utf8proc

diff ruby/utf8proc.rb @ 2:aaad485d5335

Version 0.3

- changed normalization from NFC to NFKC for postgresql unifold function
- added support to mark the beginning of a grapheme cluster with 0xFF (option: CHARBOUND)
- added the ruby method String#chars, which is returning an array of UTF-8 encoded grapheme clusters
- added NLF2LF transformation in postgresql unifold function
- added the DECOMPOSE option, if you neither use COMPOSE or DECOMPOSE, no normalization will be performed (different from previous versions)
- using integer constants rather than C-strings for character properties
- fixed (hopefully) a problem with the ruby library on Mac OS X, which occured when compiler optimization was switched on
author jbe
date Fri Aug 04 12:00:00 2006 +0200 (2006-08-04)
parents 61a89ecc2fb9
children 4ee0d5f54af1
line diff
     1.1 --- a/ruby/utf8proc.rb	Tue Jun 20 12:00:00 2006 +0200
     1.2 +++ b/ruby/utf8proc.rb	Fri Aug 04 12:00:00 2006 +0200
     1.3 @@ -33,8 +33,8 @@
     1.4  
     1.5  ##
     1.6   #  File name:    ruby/utf8proc.rb
     1.7 - #  Version:      0.2
     1.8 - #  Last changed: 2006-05-31
     1.9 + #  Version:      0.3
    1.10 + #  Last changed: 2006-08-04
    1.11   #
    1.12   #  Description:
    1.13   #  Part of the ruby wrapper for libutf8proc, which is written in ruby.
    1.14 @@ -43,7 +43,9 @@
    1.15  
    1.16  require 'utf8proc_native'
    1.17  
    1.18 +
    1.19  module Utf8Proc
    1.20 +
    1.21    SpecialChars = {
    1.22      :HT => "\x09",
    1.23      :LF => "\x0A",
    1.24 @@ -57,36 +59,50 @@
    1.25      :LS => "\xE2\x80\xA8",
    1.26      :PS => "\xE2\x80\xA9",
    1.27    }
    1.28 +
    1.29 +  module StringExtensions
    1.30 +    def utf8map(*option_array)
    1.31 +      options = 0
    1.32 +      option_array.each do |option|
    1.33 +        flag = Utf8Proc::Options[option]
    1.34 +        raise ArgumentError, "Unknown argument given to String#utf8map." unless
    1.35 +          flag
    1.36 +        options |= flag
    1.37 +      end
    1.38 +      return Utf8Proc::utf8map(self, options)
    1.39 +    end
    1.40 +    def utf8map!(*option_array)
    1.41 +      self.replace(self.utf8map(*option_array))
    1.42 +    end
    1.43 +    def utf8nfd;   utf8map( :stable, :decompose); end
    1.44 +    def utf8nfd!;  utf8map!(:stable, :decompose); end
    1.45 +    def utf8nfc;   utf8map( :stable, :compose); end
    1.46 +    def utf8nfc!;  utf8map!(:stable, :compose); end
    1.47 +    def utf8nfkd;  utf8map( :stable, :decompose, :compat); end
    1.48 +    def utf8nfkd!; utf8map!(:stable, :decompose, :compat); end
    1.49 +    def utf8nfkc;  utf8map( :stable, :compose, :compat); end
    1.50 +    def utf8nfkc!; utf8map!(:stable, :compose, :compat); end
    1.51 +    def char_ary
    1.52 +      char_ary = self.utf8map(:charbound).split("\377")
    1.53 +      char_ary.shift if char_ary.first == ''
    1.54 +      char_ary
    1.55 +    end
    1.56 +  end
    1.57 +
    1.58 +  module IntegerExtensions
    1.59 +    def utf8
    1.60 +      return Utf8Proc::utf8char(self)
    1.61 +    end
    1.62 +  end
    1.63 +
    1.64  end
    1.65  
    1.66 +
    1.67  class String
    1.68 -  def utf8map(*option_array)
    1.69 -    options = 0
    1.70 -    option_array.each do |option|
    1.71 -      flag = Utf8Proc::Options[option]
    1.72 -      raise ArgumentError, "Unknown argument given to String#utf8map." unless
    1.73 -        flag
    1.74 -      options |= flag
    1.75 -    end
    1.76 -    return Utf8Proc::utf8map(self, options)
    1.77 -  end
    1.78 -  def utf8map!(*option_array)
    1.79 -    self.replace(self.utf8map(*option_array))
    1.80 -  end
    1.81 -  def utf8nfd;   utf8map( :stable); end
    1.82 -  def utf8nfd!;  utf8map!(:stable); end
    1.83 -  def utf8nfc;   utf8map( :stable, :compose); end
    1.84 -  def utf8nfc!;  utf8map!(:stable, :compose); end
    1.85 -  def utf8nfkd;  utf8map( :stable, :compat); end
    1.86 -  def utf8nfkd!; utf8map!(:stable, :compat); end
    1.87 -  def utf8nfkc;  utf8map( :stable, :compose, :compat); end
    1.88 -  def utf8nfkc!; utf8map!(:stable, :compose, :compat); end
    1.89 +  include(Utf8Proc::StringExtensions)
    1.90  end
    1.91  
    1.92  class Integer
    1.93 -  def utf8
    1.94 -    return Utf8Proc::utf8char(self)
    1.95 -  end
    1.96 +  include(Utf8Proc::IntegerExtensions)
    1.97  end
    1.98  
    1.99 -

Impressum / About Us