utf8proc
diff ruby/utf8proc.rb @ 2:aaad485d5335
Version 0.3
- changed normalization from NFC to NFKC for postgresql unifold function
- added support to mark the beginning of a grapheme cluster with 0xFF (option: CHARBOUND)
- added the ruby method String#chars, which is returning an array of UTF-8 encoded grapheme clusters
- added NLF2LF transformation in postgresql unifold function
- added the DECOMPOSE option, if you neither use COMPOSE or DECOMPOSE, no normalization will be performed (different from previous versions)
- using integer constants rather than C-strings for character properties
- fixed (hopefully) a problem with the ruby library on Mac OS X, which occured when compiler optimization was switched on
- changed normalization from NFC to NFKC for postgresql unifold function
- added support to mark the beginning of a grapheme cluster with 0xFF (option: CHARBOUND)
- added the ruby method String#chars, which is returning an array of UTF-8 encoded grapheme clusters
- added NLF2LF transformation in postgresql unifold function
- added the DECOMPOSE option, if you neither use COMPOSE or DECOMPOSE, no normalization will be performed (different from previous versions)
- using integer constants rather than C-strings for character properties
- fixed (hopefully) a problem with the ruby library on Mac OS X, which occured when compiler optimization was switched on
author | jbe |
---|---|
date | Fri Aug 04 12:00:00 2006 +0200 (2006-08-04) |
parents | 61a89ecc2fb9 |
children | 4ee0d5f54af1 |
line diff
1.1 --- a/ruby/utf8proc.rb Tue Jun 20 12:00:00 2006 +0200 1.2 +++ b/ruby/utf8proc.rb Fri Aug 04 12:00:00 2006 +0200 1.3 @@ -33,8 +33,8 @@ 1.4 1.5 ## 1.6 # File name: ruby/utf8proc.rb 1.7 - # Version: 0.2 1.8 - # Last changed: 2006-05-31 1.9 + # Version: 0.3 1.10 + # Last changed: 2006-08-04 1.11 # 1.12 # Description: 1.13 # Part of the ruby wrapper for libutf8proc, which is written in ruby. 1.14 @@ -43,7 +43,9 @@ 1.15 1.16 require 'utf8proc_native' 1.17 1.18 + 1.19 module Utf8Proc 1.20 + 1.21 SpecialChars = { 1.22 :HT => "\x09", 1.23 :LF => "\x0A", 1.24 @@ -57,36 +59,50 @@ 1.25 :LS => "\xE2\x80\xA8", 1.26 :PS => "\xE2\x80\xA9", 1.27 } 1.28 + 1.29 + module StringExtensions 1.30 + def utf8map(*option_array) 1.31 + options = 0 1.32 + option_array.each do |option| 1.33 + flag = Utf8Proc::Options[option] 1.34 + raise ArgumentError, "Unknown argument given to String#utf8map." unless 1.35 + flag 1.36 + options |= flag 1.37 + end 1.38 + return Utf8Proc::utf8map(self, options) 1.39 + end 1.40 + def utf8map!(*option_array) 1.41 + self.replace(self.utf8map(*option_array)) 1.42 + end 1.43 + def utf8nfd; utf8map( :stable, :decompose); end 1.44 + def utf8nfd!; utf8map!(:stable, :decompose); end 1.45 + def utf8nfc; utf8map( :stable, :compose); end 1.46 + def utf8nfc!; utf8map!(:stable, :compose); end 1.47 + def utf8nfkd; utf8map( :stable, :decompose, :compat); end 1.48 + def utf8nfkd!; utf8map!(:stable, :decompose, :compat); end 1.49 + def utf8nfkc; utf8map( :stable, :compose, :compat); end 1.50 + def utf8nfkc!; utf8map!(:stable, :compose, :compat); end 1.51 + def char_ary 1.52 + char_ary = self.utf8map(:charbound).split("\377") 1.53 + char_ary.shift if char_ary.first == '' 1.54 + char_ary 1.55 + end 1.56 + end 1.57 + 1.58 + module IntegerExtensions 1.59 + def utf8 1.60 + return Utf8Proc::utf8char(self) 1.61 + end 1.62 + end 1.63 + 1.64 end 1.65 1.66 + 1.67 class String 1.68 - def utf8map(*option_array) 1.69 - options = 0 1.70 - option_array.each do |option| 1.71 - flag = Utf8Proc::Options[option] 1.72 - raise ArgumentError, "Unknown argument given to String#utf8map." unless 1.73 - flag 1.74 - options |= flag 1.75 - end 1.76 - return Utf8Proc::utf8map(self, options) 1.77 - end 1.78 - def utf8map!(*option_array) 1.79 - self.replace(self.utf8map(*option_array)) 1.80 - end 1.81 - def utf8nfd; utf8map( :stable); end 1.82 - def utf8nfd!; utf8map!(:stable); end 1.83 - def utf8nfc; utf8map( :stable, :compose); end 1.84 - def utf8nfc!; utf8map!(:stable, :compose); end 1.85 - def utf8nfkd; utf8map( :stable, :compat); end 1.86 - def utf8nfkd!; utf8map!(:stable, :compat); end 1.87 - def utf8nfkc; utf8map( :stable, :compose, :compat); end 1.88 - def utf8nfkc!; utf8map!(:stable, :compose, :compat); end 1.89 + include(Utf8Proc::StringExtensions) 1.90 end 1.91 1.92 class Integer 1.93 - def utf8 1.94 - return Utf8Proc::utf8char(self) 1.95 - end 1.96 + include(Utf8Proc::IntegerExtensions) 1.97 end 1.98 1.99 -