utf8proc
changeset 9:951e73a98021 v1.1.3
Version 1.1.3
- Added a function utf8proc_version returning a string containing the version number of the library.
- Included a target libutf8proc.dylib for MacOSX.
- PostgreSQL 8.3 compatibility (use of SET_VARSIZE macro)
- Added a function utf8proc_version returning a string containing the version number of the library.
- Included a target libutf8proc.dylib for MacOSX.
- PostgreSQL 8.3 compatibility (use of SET_VARSIZE macro)
author | jbe |
---|---|
date | Fri May 01 12:00:00 2009 +0200 (2009-05-01) |
parents | 6921ee309940 |
children | 00d2bcbdc945 |
files | Changelog LICENSE Makefile README pgsql/utf8proc_pgsql.c ruby/gem/LICENSE ruby/gem/lib/utf8proc.rb ruby/gem/utf8proc.gemspec utf8proc-1.1.2.gem utf8proc-1.1.3.gem utf8proc.c utf8proc.h |
line diff
1.1 --- a/Changelog Wed Jul 25 12:00:00 2007 +0200 1.2 +++ b/Changelog Fri May 01 12:00:00 2009 +0200 1.3 @@ -16,6 +16,7 @@ 1.4 1.5 Release of version 0.2 1.6 1.7 + 1.8 2006-07-18: 1.9 - changed normalization from NFC to NFKC for postgresql unifold function 1.10 1.11 @@ -33,6 +34,7 @@ 1.12 1.13 Release of version 0.3 1.14 1.15 + 1.16 2006-09-17: 1.17 - added the LUMP option, which lumps certain characters together 1.18 (see lump.txt) (also used for the PostgreSQL "unifold" function) 1.19 @@ -42,11 +44,13 @@ 1.20 1.21 Release of version 1.0 1.22 1.23 + 1.24 2006-09-20: 1.25 - included a gem file for the ruby version of the library 1.26 1.27 Release of version 1.0.1 1.28 1.29 + 1.30 2006-09-21: 1.31 - included a check in Integer#utf8, which raises an exception, if the given 1.32 code-point is invalid because of being too high (this was missing yet) 1.33 @@ -56,12 +60,14 @@ 1.34 1.35 Release of version 1.0.2 1.36 1.37 + 1.38 2007-03-16: 1.39 - Fixed a bug in the ruby library, which caused an error, when splitting an 1.40 empty string at grapheme cluster boundaries (method String#utf8chars). 1.41 1.42 Release of version 1.0.3 1.43 1.44 + 1.45 2007-06-25: 1.46 - Added a new PostgreSQL function 'unistrip', which behaves like 'unifold', 1.47 but also removes all character marks (e.g. accents). 1.48 @@ -75,8 +81,22 @@ 1.49 1.50 Release of version 1.1.1 1.51 1.52 + 1.53 2007-07-25: 1.54 - Fixed a serious bug in the data file generator, which caused characters 1.55 being treated incorrectly, when stripping default ignorable characters or 1.56 calculating grapheme cluster boundaries. 1.57 1.58 +Release of version 1.1.2 1.59 + 1.60 + 1.61 +2008-10-04: 1.62 +- Added a function utf8proc_version returning a string containing the version 1.63 + number of the library. 1.64 +- Included a target libutf8proc.dylib for MacOSX. 1.65 + 1.66 +2009-05-01: 1.67 +- PostgreSQL 8.3 compatibility (use of SET_VARSIZE macro) 1.68 + 1.69 +Release of version 1.1.3 1.70 +
2.1 --- a/LICENSE Wed Jul 25 12:00:00 2007 +0200 2.2 +++ b/LICENSE Fri May 01 12:00:00 2009 +0200 2.3 @@ -1,5 +1,5 @@ 2.4 2.5 -Copyright (c) 2006-2007 Jan Behrens, FlexiGuided GmbH, Berlin 2.6 +Copyright (c) 2006-2009 Jan Behrens, FlexiGuided GmbH, Berlin 2.7 2.8 Permission is hereby granted, free of charge, to any person obtaining a 2.9 copy of this software and associated documentation files (the "Software"),
3.1 --- a/Makefile Wed Jul 25 12:00:00 2007 +0200 3.2 +++ b/Makefile Fri May 01 12:00:00 2009 +0200 3.3 @@ -4,7 +4,7 @@ 3.4 # settings 3.5 3.6 cflags = -O2 -std=c99 -pedantic -Wall -fpic $(CFLAGS) 3.7 -cc = gcc $(cflags) 3.8 +cc = $(CC) $(cflags) 3.9 3.10 3.11 # meta targets 3.12 @@ -35,6 +35,9 @@ 3.13 $(cc) -shared -o libutf8proc.so utf8proc.o 3.14 chmod a-x libutf8proc.so 3.15 3.16 +libutf8proc.dylib: utf8proc.o 3.17 + $(cc) -dynamiclib -o $@ $^ -install_name $(libdir)/$@ 3.18 + 3.19 ruby/Makefile: ruby/extconf.rb 3.20 cd ruby && ruby extconf.rb 3.21
4.1 --- a/README Wed Jul 25 12:00:00 2007 +0200 4.2 +++ b/README Fri May 01 12:00:00 2009 +0200 4.3 @@ -11,7 +11,7 @@ 4.4 "make all" can be used to build everything, but both ruby and PostgreSQL 4.5 installations are required in this case. 4.6 4.7 -For ruby there is alternatively provided a gem-file "utf8proc-1.1.1.gem". 4.8 +For ruby there is alternatively provided a gem-file "utf8proc-1.1.3.gem". 4.9 4.10 4.11 *** GENERAL INFORMATION *** 4.12 @@ -110,6 +110,8 @@ 4.13 If you find any bugs or experience difficulties in compiling this software, 4.14 please contact me: 4.15 4.16 -Jan Behrens <jan.behrens.n4272.expires-2008-06@flexiguided.de> 4.17 -http://www.flexiguided.de/publications.utf8proc.en.html 4.18 +Project page: http://www.flexiguided.de/publications.utf8proc.en.html 4.19 +Contact form: http://www.flexiguided.de/contactform.en.html 4.20 4.21 +Jan Behrens 4.22 +
5.1 --- a/pgsql/utf8proc_pgsql.c Wed Jul 25 12:00:00 2007 +0200 5.2 +++ b/pgsql/utf8proc_pgsql.c Fri May 01 12:00:00 2009 +0200 5.3 @@ -1,5 +1,5 @@ 5.4 /* 5.5 - * Copyright (c) 2006-2007 Jan Behrens, FlexiGuided GmbH, Berlin 5.6 + * Copyright (c) 2006-2009 Jan Behrens, FlexiGuided GmbH, Berlin 5.7 * 5.8 * Permission is hereby granted, free of charge, to any person obtaining a 5.9 * copy of this software and associated documentation files (the "Software"), 5.10 @@ -23,8 +23,8 @@ 5.11 5.12 /* 5.13 * File name: pgsql/utf8proc_pgsql.c 5.14 - * Version: 1.1.1 5.15 - * Last changed: 2007-07-22 5.16 + * Version: 1.1.3 5.17 + * Last changed: 2009-05-01 5.18 * 5.19 * Description: 5.20 * PostgreSQL extension to provide two functions 'unifold' and 'unistrip', 5.21 @@ -77,7 +77,7 @@ 5.22 result = utf8proc_reencode( 5.23 (int32_t *)VARDATA(output_string), result, options 5.24 ); 5.25 - if (result >= 0) VARATT_SIZEP(output_string) = result + VARHDRSZ; 5.26 + if (result >= 0) SET_VARSIZE(output_string, result + VARHDRSZ); 5.27 return result; 5.28 } 5.29
6.1 --- a/ruby/gem/LICENSE Wed Jul 25 12:00:00 2007 +0200 6.2 +++ b/ruby/gem/LICENSE Fri May 01 12:00:00 2009 +0200 6.3 @@ -1,1 +1,64 @@ 6.4 -../../LICENSE 6.5 \ No newline at end of file 6.6 + 6.7 +Copyright (c) 2006-2009 Jan Behrens, FlexiGuided GmbH, Berlin 6.8 + 6.9 +Permission is hereby granted, free of charge, to any person obtaining a 6.10 +copy of this software and associated documentation files (the "Software"), 6.11 +to deal in the Software without restriction, including without limitation 6.12 +the rights to use, copy, modify, merge, publish, distribute, sublicense, 6.13 +and/or sell copies of the Software, and to permit persons to whom the 6.14 +Software is furnished to do so, subject to the following conditions: 6.15 + 6.16 +The above copyright notice and this permission notice shall be included in 6.17 +all copies or substantial portions of the Software. 6.18 + 6.19 +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 6.20 +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 6.21 +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 6.22 +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 6.23 +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 6.24 +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 6.25 +DEALINGS IN THE SOFTWARE. 6.26 + 6.27 + 6.28 +This software distribution contains derived data from a modified version of 6.29 +the Unicode data files. The following license applies to that data: 6.30 + 6.31 +COPYRIGHT AND PERMISSION NOTICE 6.32 + 6.33 +Copyright (c) 1991-2007 Unicode, Inc. All rights reserved. Distributed 6.34 +under the Terms of Use in http://www.unicode.org/copyright.html. 6.35 + 6.36 +Permission is hereby granted, free of charge, to any person obtaining a 6.37 +copy of the Unicode data files and any associated documentation (the "Data 6.38 +Files") or Unicode software and any associated documentation (the 6.39 +"Software") to deal in the Data Files or Software without restriction, 6.40 +including without limitation the rights to use, copy, modify, merge, 6.41 +publish, distribute, and/or sell copies of the Data Files or Software, and 6.42 +to permit persons to whom the Data Files or Software are furnished to do 6.43 +so, provided that (a) the above copyright notice(s) and this permission 6.44 +notice appear with all copies of the Data Files or Software, (b) both the 6.45 +above copyright notice(s) and this permission notice appear in associated 6.46 +documentation, and (c) there is clear notice in each modified Data File or 6.47 +in the Software as well as in the documentation associated with the Data 6.48 +File(s) or Software that the data or software has been modified. 6.49 + 6.50 +THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 6.51 +KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 6.52 +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF 6.53 +THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS 6.54 +INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR 6.55 +CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF 6.56 +USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER 6.57 +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 6.58 +PERFORMANCE OF THE DATA FILES OR SOFTWARE. 6.59 + 6.60 +Except as contained in this notice, the name of a copyright holder shall 6.61 +not be used in advertising or otherwise to promote the sale, use or other 6.62 +dealings in these Data Files or Software without prior written 6.63 +authorization of the copyright holder. 6.64 + 6.65 + 6.66 +Unicode and the Unicode logo are trademarks of Unicode, Inc., and may be 6.67 +registered in some jurisdictions. All other trademarks and registered 6.68 +trademarks mentioned herein are the property of their respective owners. 6.69 +
7.1 --- a/ruby/gem/lib/utf8proc.rb Wed Jul 25 12:00:00 2007 +0200 7.2 +++ b/ruby/gem/lib/utf8proc.rb Fri May 01 12:00:00 2009 +0200 7.3 @@ -1,1 +1,100 @@ 7.4 -../../utf8proc.rb 7.5 \ No newline at end of file 7.6 +# Copyright (c) 2006-2007 Jan Behrens, FlexiGuided GmbH, Berlin 7.7 +# 7.8 +# Permission is hereby granted, free of charge, to any person obtaining a 7.9 +# copy of this software and associated documentation files (the "Software"), 7.10 +# to deal in the Software without restriction, including without limitation 7.11 +# the rights to use, copy, modify, merge, publish, distribute, sublicense, 7.12 +# and/or sell copies of the Software, and to permit persons to whom the 7.13 +# Software is furnished to do so, subject to the following conditions: 7.14 +# 7.15 +# The above copyright notice and this permission notice shall be included in 7.16 +# all copies or substantial portions of the Software. 7.17 +# 7.18 +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 7.19 +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 7.20 +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 7.21 +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 7.22 +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 7.23 +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 7.24 +# DEALINGS IN THE SOFTWARE. 7.25 + 7.26 + 7.27 +# 7.28 +# File name: ruby/utf8proc.rb 7.29 +# Version: 1.1.1 7.30 +# Last changed: 2006-09-17 7.31 +# 7.32 +# Description: 7.33 +# Part of the ruby wrapper for libutf8proc, which is written in ruby. 7.34 +# 7.35 + 7.36 + 7.37 +require 'utf8proc_native' 7.38 + 7.39 + 7.40 +module Utf8Proc 7.41 + 7.42 + SpecialChars = { 7.43 + :HT => "\x09", 7.44 + :LF => "\x0A", 7.45 + :VT => "\x0B", 7.46 + :FF => "\x0C", 7.47 + :CR => "\x0D", 7.48 + :FS => "\x1C", 7.49 + :GS => "\x1D", 7.50 + :RS => "\x1E", 7.51 + :US => "\x1F", 7.52 + :LS => "\xE2\x80\xA8", 7.53 + :PS => "\xE2\x80\xA9", 7.54 + } 7.55 + 7.56 + module StringExtensions 7.57 + def utf8map(*option_array) 7.58 + options = 0 7.59 + option_array.each do |option| 7.60 + flag = Utf8Proc::Options[option] 7.61 + raise ArgumentError, "Unknown argument given to String#utf8map." unless 7.62 + flag 7.63 + options |= flag 7.64 + end 7.65 + return Utf8Proc::utf8map(self, options) 7.66 + end 7.67 + def utf8map!(*option_array) 7.68 + self.replace(self.utf8map(*option_array)) 7.69 + end 7.70 + def utf8nfd; utf8map( :stable, :decompose); end 7.71 + def utf8nfd!; utf8map!(:stable, :decompose); end 7.72 + def utf8nfc; utf8map( :stable, :compose); end 7.73 + def utf8nfc!; utf8map!(:stable, :compose); end 7.74 + def utf8nfkd; utf8map( :stable, :decompose, :compat); end 7.75 + def utf8nfkd!; utf8map!(:stable, :decompose, :compat); end 7.76 + def utf8nfkc; utf8map( :stable, :compose, :compat); end 7.77 + def utf8nfkc!; utf8map!(:stable, :compose, :compat); end 7.78 + def utf8chars 7.79 + result = self.utf8map(:charbound).split("\377") 7.80 + result.shift if result.first == "" 7.81 + result 7.82 + end 7.83 + def char_ary 7.84 + # depecated, use String#utf8chars instead 7.85 + utf8chars 7.86 + end 7.87 + end 7.88 + 7.89 + module IntegerExtensions 7.90 + def utf8 7.91 + return Utf8Proc::utf8char(self) 7.92 + end 7.93 + end 7.94 + 7.95 +end 7.96 + 7.97 + 7.98 +class String 7.99 + include(Utf8Proc::StringExtensions) 7.100 +end 7.101 + 7.102 +class Integer 7.103 + include(Utf8Proc::IntegerExtensions) 7.104 +end 7.105 +
8.1 --- a/ruby/gem/utf8proc.gemspec Wed Jul 25 12:00:00 2007 +0200 8.2 +++ b/ruby/gem/utf8proc.gemspec Fri May 01 12:00:00 2009 +0200 8.3 @@ -1,9 +1,8 @@ 8.4 require 'rubygems' 8.5 SPEC = Gem::Specification.new do |s| 8.6 s.name = 'utf8proc' 8.7 - s.version = '1.1.2' 8.8 + s.version = '1.1.3' 8.9 s.author = 'Jan Behrens' 8.10 - s.email = 'jan.behrens.n4272.expires-2008-06@flexiguided.de' 8.11 s.homepage = 'http://www.flexiguided.de/publications.utf8proc.en.html' 8.12 s.summary = 'UTF-8 Unicode string processing' 8.13 s.files = ['LICENSE', 'lib/utf8proc.rb', 'ext/utf8proc_native.c']
9.1 Binary file utf8proc-1.1.2.gem has changed
10.1 Binary file utf8proc-1.1.3.gem has changed
11.1 --- a/utf8proc.c Wed Jul 25 12:00:00 2007 +0200 11.2 +++ b/utf8proc.c Fri May 01 12:00:00 2009 +0200 11.3 @@ -97,6 +97,10 @@ 11.4 #define UTF8PROC_BOUNDCLASS_LVT 10 11.5 11.6 11.7 +const char *utf8proc_version(void) { 11.8 + return "1.1.3"; 11.9 +} 11.10 + 11.11 const char *utf8proc_errmsg(ssize_t errcode) { 11.12 switch (errcode) { 11.13 case UTF8PROC_ERROR_NOMEM:
12.1 --- a/utf8proc.h Wed Jul 25 12:00:00 2007 +0200 12.2 +++ b/utf8proc.h Fri May 01 12:00:00 2009 +0200 12.3 @@ -220,6 +220,8 @@ 12.4 12.5 extern const int8_t utf8proc_utf8class[256]; 12.6 12.7 +const char *utf8proc_version(void); 12.8 + 12.9 const char *utf8proc_errmsg(ssize_t errcode); 12.10 /* 12.11 * Returns a static error string for the given error code.