utf8proc

view ruby/utf8proc.rb @ 2:aaad485d5335

Version 0.3

- changed normalization from NFC to NFKC for postgresql unifold function
- added support to mark the beginning of a grapheme cluster with 0xFF (option: CHARBOUND)
- added the ruby method String#chars, which is returning an array of UTF-8 encoded grapheme clusters
- added NLF2LF transformation in postgresql unifold function
- added the DECOMPOSE option, if you neither use COMPOSE or DECOMPOSE, no normalization will be performed (different from previous versions)
- using integer constants rather than C-strings for character properties
- fixed (hopefully) a problem with the ruby library on Mac OS X, which occured when compiler optimization was switched on
author jbe
date Fri Aug 04 12:00:00 2006 +0200 (2006-08-04)
parents 61a89ecc2fb9
children 4ee0d5f54af1
line source
1 ##
2 # Copyright (c) 2006, FlexiGuided GmbH, Berlin, Germany
3 # Author: Jan Behrens <jan.behrens@flexiguided.de>
4 # All rights reserved.
5 #
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions are
8 # met:
9 #
10 # 1. Redistributions of source code must retain the above copyright
11 # notice, this list of conditions and the following disclaimer.
12 # 2. Redistributions in binary form must reproduce the above copyright
13 # notice, this list of conditions and the following disclaimer in the
14 # documentation and/or other materials provided with the distribution.
15 # 3. Neither the name of the FlexiGuided GmbH nor the names of its
16 # contributors may be used to endorse or promote products derived from
17 # this software without specific prior written permission.
18 #
19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
22 # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
23 # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 #
31 ##
34 ##
35 # File name: ruby/utf8proc.rb
36 # Version: 0.3
37 # Last changed: 2006-08-04
38 #
39 # Description:
40 # Part of the ruby wrapper for libutf8proc, which is written in ruby.
41 ##
44 require 'utf8proc_native'
47 module Utf8Proc
49 SpecialChars = {
50 :HT => "\x09",
51 :LF => "\x0A",
52 :VT => "\x0B",
53 :FF => "\x0C",
54 :CR => "\x0D",
55 :FS => "\x1C",
56 :GS => "\x1D",
57 :RS => "\x1E",
58 :US => "\x1F",
59 :LS => "\xE2\x80\xA8",
60 :PS => "\xE2\x80\xA9",
61 }
63 module StringExtensions
64 def utf8map(*option_array)
65 options = 0
66 option_array.each do |option|
67 flag = Utf8Proc::Options[option]
68 raise ArgumentError, "Unknown argument given to String#utf8map." unless
69 flag
70 options |= flag
71 end
72 return Utf8Proc::utf8map(self, options)
73 end
74 def utf8map!(*option_array)
75 self.replace(self.utf8map(*option_array))
76 end
77 def utf8nfd; utf8map( :stable, :decompose); end
78 def utf8nfd!; utf8map!(:stable, :decompose); end
79 def utf8nfc; utf8map( :stable, :compose); end
80 def utf8nfc!; utf8map!(:stable, :compose); end
81 def utf8nfkd; utf8map( :stable, :decompose, :compat); end
82 def utf8nfkd!; utf8map!(:stable, :decompose, :compat); end
83 def utf8nfkc; utf8map( :stable, :compose, :compat); end
84 def utf8nfkc!; utf8map!(:stable, :compose, :compat); end
85 def char_ary
86 char_ary = self.utf8map(:charbound).split("\377")
87 char_ary.shift if char_ary.first == ''
88 char_ary
89 end
90 end
92 module IntegerExtensions
93 def utf8
94 return Utf8Proc::utf8char(self)
95 end
96 end
98 end
101 class String
102 include(Utf8Proc::StringExtensions)
103 end
105 class Integer
106 include(Utf8Proc::IntegerExtensions)
107 end

Impressum / About Us