utf8proc

annotate ruby/utf8proc.rb @ 3:4ee0d5f54af1

Version 1.0

- added the LUMP option, which lumps certain characters together (see lump.txt) (also used for the PostgreSQL "unifold" function)
- added the STRIPMARK option, which strips marking characters (or marks of composed characters)
- deprecated ruby method String#char_ary in favour of String#utf8chars
author jbe
date Sun Sep 17 12:00:00 2006 +0200 (2006-09-17)
parents aaad485d5335
children d04d3a9b486e
rev   line source
jbe@0 1 ##
jbe@0 2 # Copyright (c) 2006, FlexiGuided GmbH, Berlin, Germany
jbe@0 3 # Author: Jan Behrens <jan.behrens@flexiguided.de>
jbe@0 4 # All rights reserved.
jbe@0 5 #
jbe@0 6 # Redistribution and use in source and binary forms, with or without
jbe@0 7 # modification, are permitted provided that the following conditions are
jbe@0 8 # met:
jbe@0 9 #
jbe@0 10 # 1. Redistributions of source code must retain the above copyright
jbe@0 11 # notice, this list of conditions and the following disclaimer.
jbe@0 12 # 2. Redistributions in binary form must reproduce the above copyright
jbe@0 13 # notice, this list of conditions and the following disclaimer in the
jbe@0 14 # documentation and/or other materials provided with the distribution.
jbe@0 15 # 3. Neither the name of the FlexiGuided GmbH nor the names of its
jbe@0 16 # contributors may be used to endorse or promote products derived from
jbe@0 17 # this software without specific prior written permission.
jbe@0 18 #
jbe@0 19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
jbe@0 20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
jbe@0 21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
jbe@0 22 # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
jbe@0 23 # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
jbe@0 24 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
jbe@0 25 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
jbe@0 26 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
jbe@0 27 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
jbe@0 28 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
jbe@0 29 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
jbe@0 30 #
jbe@0 31 ##
jbe@0 32
jbe@0 33
jbe@0 34 ##
jbe@0 35 # File name: ruby/utf8proc.rb
jbe@3 36 # Version: 1.0
jbe@3 37 # Last changed: 2006-09-17
jbe@0 38 #
jbe@0 39 # Description:
jbe@0 40 # Part of the ruby wrapper for libutf8proc, which is written in ruby.
jbe@0 41 ##
jbe@0 42
jbe@0 43
jbe@0 44 require 'utf8proc_native'
jbe@0 45
jbe@2 46
jbe@0 47 module Utf8Proc
jbe@2 48
jbe@0 49 SpecialChars = {
jbe@0 50 :HT => "\x09",
jbe@0 51 :LF => "\x0A",
jbe@0 52 :VT => "\x0B",
jbe@0 53 :FF => "\x0C",
jbe@0 54 :CR => "\x0D",
jbe@0 55 :FS => "\x1C",
jbe@0 56 :GS => "\x1D",
jbe@0 57 :RS => "\x1E",
jbe@0 58 :US => "\x1F",
jbe@0 59 :LS => "\xE2\x80\xA8",
jbe@0 60 :PS => "\xE2\x80\xA9",
jbe@0 61 }
jbe@2 62
jbe@2 63 module StringExtensions
jbe@2 64 def utf8map(*option_array)
jbe@2 65 options = 0
jbe@2 66 option_array.each do |option|
jbe@2 67 flag = Utf8Proc::Options[option]
jbe@2 68 raise ArgumentError, "Unknown argument given to String#utf8map." unless
jbe@2 69 flag
jbe@2 70 options |= flag
jbe@2 71 end
jbe@2 72 return Utf8Proc::utf8map(self, options)
jbe@2 73 end
jbe@2 74 def utf8map!(*option_array)
jbe@2 75 self.replace(self.utf8map(*option_array))
jbe@2 76 end
jbe@2 77 def utf8nfd; utf8map( :stable, :decompose); end
jbe@2 78 def utf8nfd!; utf8map!(:stable, :decompose); end
jbe@2 79 def utf8nfc; utf8map( :stable, :compose); end
jbe@2 80 def utf8nfc!; utf8map!(:stable, :compose); end
jbe@2 81 def utf8nfkd; utf8map( :stable, :decompose, :compat); end
jbe@2 82 def utf8nfkd!; utf8map!(:stable, :decompose, :compat); end
jbe@2 83 def utf8nfkc; utf8map( :stable, :compose, :compat); end
jbe@2 84 def utf8nfkc!; utf8map!(:stable, :compose, :compat); end
jbe@3 85 def utf8chars
jbe@3 86 result = self.utf8map(:charbound).split("\377")
jbe@3 87 result.shift if result.first.empty?
jbe@3 88 result
jbe@3 89 end
jbe@2 90 def char_ary
jbe@3 91 # depecated, use String#utf8chars instead
jbe@3 92 utf8chars
jbe@2 93 end
jbe@2 94 end
jbe@2 95
jbe@2 96 module IntegerExtensions
jbe@2 97 def utf8
jbe@2 98 return Utf8Proc::utf8char(self)
jbe@2 99 end
jbe@2 100 end
jbe@2 101
jbe@0 102 end
jbe@0 103
jbe@2 104
jbe@0 105 class String
jbe@2 106 include(Utf8Proc::StringExtensions)
jbe@0 107 end
jbe@0 108
jbe@0 109 class Integer
jbe@2 110 include(Utf8Proc::IntegerExtensions)
jbe@0 111 end
jbe@0 112

Impressum / About Us