utf8proc

diff ruby/utf8proc.rb @ 3:4ee0d5f54af1
Version 1.0

- added the LUMP option, which lumps certain characters together (see lump.txt) (also used for the PostgreSQL "unifold" function)
- added the STRIPMARK option, which strips marking characters (or marks of composed characters)
- deprecated ruby method String#char_ary in favour of String#utf8chars
author: jbe
date: Sun Sep 17 12:00:00 2006 +0200 (2006-09-17)
parents: aaad485d5335
children: d04d3a9b486e
     1.1 --- a/ruby/utf8proc.rb	Fri Aug 04 12:00:00 2006 +0200
     1.2 +++ b/ruby/utf8proc.rb	Sun Sep 17 12:00:00 2006 +0200
     1.3 @@ -33,8 +33,8 @@
     1.4  
     1.5  ##
     1.6   #  File name:    ruby/utf8proc.rb
     1.7 - #  Version:      0.3
     1.8 - #  Last changed: 2006-08-04
     1.9 + #  Version:      1.0
    1.10 + #  Last changed: 2006-09-17
    1.11   #
    1.12   #  Description:
    1.13   #  Part of the ruby wrapper for libutf8proc, which is written in ruby.
    1.14 @@ -82,10 +82,14 @@
    1.15      def utf8nfkd!; utf8map!(:stable, :decompose, :compat); end
    1.16      def utf8nfkc;  utf8map( :stable, :compose, :compat); end
    1.17      def utf8nfkc!; utf8map!(:stable, :compose, :compat); end
    1.18 +    def utf8chars
    1.19 +      result = self.utf8map(:charbound).split("\377")
    1.20 +      result.shift if result.first.empty?
    1.21 +      result
    1.22 +    end
    1.23      def char_ary
    1.24 -      char_ary = self.utf8map(:charbound).split("\377")
    1.25 -      char_ary.shift if char_ary.first == ''
    1.26 -      char_ary
    1.27 +      # depecated, use String#utf8chars instead
    1.28 +      utf8chars
    1.29      end
    1.30    end
    1.31
author	jbe
date	Sun Sep 17 12:00:00 2006 +0200 (2006-09-17)
parents	aaad485d5335
children	d04d3a9b486e