utf8proc

view ruby/utf8proc.rb @ 7:fcfd8c836c64

Version 1.1.1

- Added a new PostgreSQL function 'unistrip', which behaves like 'unifold', but also removes all character marks (e.g. accents).
- Changed license from BSD to MIT style.
- Added a new function 'utf8proc_codepoint_valid' to the C library.
- Changed compiler flags in Makefile from -g -O0 to -O2
- The ruby script, which was used to build the utf8proc_data.c file, is now included in the distribution.
author jbe
date Sun Jul 22 12:00:00 2007 +0200 (2007-07-22)
parents d04d3a9b486e
children 00d2bcbdc945
line source
1 # Copyright (c) 2006-2007 Jan Behrens, FlexiGuided GmbH, Berlin
2 #
3 # Permission is hereby granted, free of charge, to any person obtaining a
4 # copy of this software and associated documentation files (the "Software"),
5 # to deal in the Software without restriction, including without limitation
6 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
7 # and/or sell copies of the Software, and to permit persons to whom the
8 # Software is furnished to do so, subject to the following conditions:
9 #
10 # The above copyright notice and this permission notice shall be included in
11 # all copies or substantial portions of the Software.
12 #
13 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
19 # DEALINGS IN THE SOFTWARE.
22 #
23 # File name: ruby/utf8proc.rb
24 # Version: 1.1.1
25 # Last changed: 2006-09-17
26 #
27 # Description:
28 # Part of the ruby wrapper for libutf8proc, which is written in ruby.
29 #
32 require 'utf8proc_native'
35 module Utf8Proc
37 SpecialChars = {
38 :HT => "\x09",
39 :LF => "\x0A",
40 :VT => "\x0B",
41 :FF => "\x0C",
42 :CR => "\x0D",
43 :FS => "\x1C",
44 :GS => "\x1D",
45 :RS => "\x1E",
46 :US => "\x1F",
47 :LS => "\xE2\x80\xA8",
48 :PS => "\xE2\x80\xA9",
49 }
51 module StringExtensions
52 def utf8map(*option_array)
53 options = 0
54 option_array.each do |option|
55 flag = Utf8Proc::Options[option]
56 raise ArgumentError, "Unknown argument given to String#utf8map." unless
57 flag
58 options |= flag
59 end
60 return Utf8Proc::utf8map(self, options)
61 end
62 def utf8map!(*option_array)
63 self.replace(self.utf8map(*option_array))
64 end
65 def utf8nfd; utf8map( :stable, :decompose); end
66 def utf8nfd!; utf8map!(:stable, :decompose); end
67 def utf8nfc; utf8map( :stable, :compose); end
68 def utf8nfc!; utf8map!(:stable, :compose); end
69 def utf8nfkd; utf8map( :stable, :decompose, :compat); end
70 def utf8nfkd!; utf8map!(:stable, :decompose, :compat); end
71 def utf8nfkc; utf8map( :stable, :compose, :compat); end
72 def utf8nfkc!; utf8map!(:stable, :compose, :compat); end
73 def utf8chars
74 result = self.utf8map(:charbound).split("\377")
75 result.shift if result.first == ""
76 result
77 end
78 def char_ary
79 # depecated, use String#utf8chars instead
80 utf8chars
81 end
82 end
84 module IntegerExtensions
85 def utf8
86 return Utf8Proc::utf8char(self)
87 end
88 end
90 end
93 class String
94 include(Utf8Proc::StringExtensions)
95 end
97 class Integer
98 include(Utf8Proc::IntegerExtensions)
99 end

Impressum / About Us