# HG changeset patch # User jbe # Date 1241172000 -7200 # Node ID 951e73a98021127e94c86cf075bd5b0e0cb08bcc # Parent 6921ee309940221c49179c0d2b2bf34cc70ae268 Version 1.1.3 - Added a function utf8proc_version returning a string containing the version number of the library. - Included a target libutf8proc.dylib for MacOSX. - PostgreSQL 8.3 compatibility (use of SET_VARSIZE macro) diff -r 6921ee309940 -r 951e73a98021 Changelog --- a/Changelog Wed Jul 25 12:00:00 2007 +0200 +++ b/Changelog Fri May 01 12:00:00 2009 +0200 @@ -16,6 +16,7 @@ Release of version 0.2 + 2006-07-18: - changed normalization from NFC to NFKC for postgresql unifold function @@ -33,6 +34,7 @@ Release of version 0.3 + 2006-09-17: - added the LUMP option, which lumps certain characters together (see lump.txt) (also used for the PostgreSQL "unifold" function) @@ -42,11 +44,13 @@ Release of version 1.0 + 2006-09-20: - included a gem file for the ruby version of the library Release of version 1.0.1 + 2006-09-21: - included a check in Integer#utf8, which raises an exception, if the given code-point is invalid because of being too high (this was missing yet) @@ -56,12 +60,14 @@ Release of version 1.0.2 + 2007-03-16: - Fixed a bug in the ruby library, which caused an error, when splitting an empty string at grapheme cluster boundaries (method String#utf8chars). Release of version 1.0.3 + 2007-06-25: - Added a new PostgreSQL function 'unistrip', which behaves like 'unifold', but also removes all character marks (e.g. accents). @@ -75,8 +81,22 @@ Release of version 1.1.1 + 2007-07-25: - Fixed a serious bug in the data file generator, which caused characters being treated incorrectly, when stripping default ignorable characters or calculating grapheme cluster boundaries. +Release of version 1.1.2 + + +2008-10-04: +- Added a function utf8proc_version returning a string containing the version + number of the library. +- Included a target libutf8proc.dylib for MacOSX. + +2009-05-01: +- PostgreSQL 8.3 compatibility (use of SET_VARSIZE macro) + +Release of version 1.1.3 + diff -r 6921ee309940 -r 951e73a98021 LICENSE --- a/LICENSE Wed Jul 25 12:00:00 2007 +0200 +++ b/LICENSE Fri May 01 12:00:00 2009 +0200 @@ -1,5 +1,5 @@ -Copyright (c) 2006-2007 Jan Behrens, FlexiGuided GmbH, Berlin +Copyright (c) 2006-2009 Jan Behrens, FlexiGuided GmbH, Berlin Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), diff -r 6921ee309940 -r 951e73a98021 Makefile --- a/Makefile Wed Jul 25 12:00:00 2007 +0200 +++ b/Makefile Fri May 01 12:00:00 2009 +0200 @@ -4,7 +4,7 @@ # settings cflags = -O2 -std=c99 -pedantic -Wall -fpic $(CFLAGS) -cc = gcc $(cflags) +cc = $(CC) $(cflags) # meta targets @@ -35,6 +35,9 @@ $(cc) -shared -o libutf8proc.so utf8proc.o chmod a-x libutf8proc.so +libutf8proc.dylib: utf8proc.o + $(cc) -dynamiclib -o $@ $^ -install_name $(libdir)/$@ + ruby/Makefile: ruby/extconf.rb cd ruby && ruby extconf.rb diff -r 6921ee309940 -r 951e73a98021 README --- a/README Wed Jul 25 12:00:00 2007 +0200 +++ b/README Fri May 01 12:00:00 2009 +0200 @@ -11,7 +11,7 @@ "make all" can be used to build everything, but both ruby and PostgreSQL installations are required in this case. -For ruby there is alternatively provided a gem-file "utf8proc-1.1.1.gem". +For ruby there is alternatively provided a gem-file "utf8proc-1.1.3.gem". *** GENERAL INFORMATION *** @@ -110,6 +110,8 @@ If you find any bugs or experience difficulties in compiling this software, please contact me: -Jan Behrens -http://www.flexiguided.de/publications.utf8proc.en.html +Project page: http://www.flexiguided.de/publications.utf8proc.en.html +Contact form: http://www.flexiguided.de/contactform.en.html +Jan Behrens + diff -r 6921ee309940 -r 951e73a98021 pgsql/utf8proc_pgsql.c --- a/pgsql/utf8proc_pgsql.c Wed Jul 25 12:00:00 2007 +0200 +++ b/pgsql/utf8proc_pgsql.c Fri May 01 12:00:00 2009 +0200 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006-2007 Jan Behrens, FlexiGuided GmbH, Berlin + * Copyright (c) 2006-2009 Jan Behrens, FlexiGuided GmbH, Berlin * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -23,8 +23,8 @@ /* * File name: pgsql/utf8proc_pgsql.c - * Version: 1.1.1 - * Last changed: 2007-07-22 + * Version: 1.1.3 + * Last changed: 2009-05-01 * * Description: * PostgreSQL extension to provide two functions 'unifold' and 'unistrip', @@ -77,7 +77,7 @@ result = utf8proc_reencode( (int32_t *)VARDATA(output_string), result, options ); - if (result >= 0) VARATT_SIZEP(output_string) = result + VARHDRSZ; + if (result >= 0) SET_VARSIZE(output_string, result + VARHDRSZ); return result; } diff -r 6921ee309940 -r 951e73a98021 ruby/gem/LICENSE --- a/ruby/gem/LICENSE Wed Jul 25 12:00:00 2007 +0200 +++ b/ruby/gem/LICENSE Fri May 01 12:00:00 2009 +0200 @@ -1,1 +1,64 @@ -../../LICENSE \ No newline at end of file + +Copyright (c) 2006-2009 Jan Behrens, FlexiGuided GmbH, Berlin + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + + +This software distribution contains derived data from a modified version of +the Unicode data files. The following license applies to that data: + +COPYRIGHT AND PERMISSION NOTICE + +Copyright (c) 1991-2007 Unicode, Inc. All rights reserved. Distributed +under the Terms of Use in http://www.unicode.org/copyright.html. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of the Unicode data files and any associated documentation (the "Data +Files") or Unicode software and any associated documentation (the +"Software") to deal in the Data Files or Software without restriction, +including without limitation the rights to use, copy, modify, merge, +publish, distribute, and/or sell copies of the Data Files or Software, and +to permit persons to whom the Data Files or Software are furnished to do +so, provided that (a) the above copyright notice(s) and this permission +notice appear with all copies of the Data Files or Software, (b) both the +above copyright notice(s) and this permission notice appear in associated +documentation, and (c) there is clear notice in each modified Data File or +in the Software as well as in the documentation associated with the Data +File(s) or Software that the data or software has been modified. + +THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY +KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF +THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS +INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR +CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF +USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THE DATA FILES OR SOFTWARE. + +Except as contained in this notice, the name of a copyright holder shall +not be used in advertising or otherwise to promote the sale, use or other +dealings in these Data Files or Software without prior written +authorization of the copyright holder. + + +Unicode and the Unicode logo are trademarks of Unicode, Inc., and may be +registered in some jurisdictions. All other trademarks and registered +trademarks mentioned herein are the property of their respective owners. + diff -r 6921ee309940 -r 951e73a98021 ruby/gem/lib/utf8proc.rb --- a/ruby/gem/lib/utf8proc.rb Wed Jul 25 12:00:00 2007 +0200 +++ b/ruby/gem/lib/utf8proc.rb Fri May 01 12:00:00 2009 +0200 @@ -1,1 +1,100 @@ -../../utf8proc.rb \ No newline at end of file +# Copyright (c) 2006-2007 Jan Behrens, FlexiGuided GmbH, Berlin +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + + +# +# File name: ruby/utf8proc.rb +# Version: 1.1.1 +# Last changed: 2006-09-17 +# +# Description: +# Part of the ruby wrapper for libutf8proc, which is written in ruby. +# + + +require 'utf8proc_native' + + +module Utf8Proc + + SpecialChars = { + :HT => "\x09", + :LF => "\x0A", + :VT => "\x0B", + :FF => "\x0C", + :CR => "\x0D", + :FS => "\x1C", + :GS => "\x1D", + :RS => "\x1E", + :US => "\x1F", + :LS => "\xE2\x80\xA8", + :PS => "\xE2\x80\xA9", + } + + module StringExtensions + def utf8map(*option_array) + options = 0 + option_array.each do |option| + flag = Utf8Proc::Options[option] + raise ArgumentError, "Unknown argument given to String#utf8map." unless + flag + options |= flag + end + return Utf8Proc::utf8map(self, options) + end + def utf8map!(*option_array) + self.replace(self.utf8map(*option_array)) + end + def utf8nfd; utf8map( :stable, :decompose); end + def utf8nfd!; utf8map!(:stable, :decompose); end + def utf8nfc; utf8map( :stable, :compose); end + def utf8nfc!; utf8map!(:stable, :compose); end + def utf8nfkd; utf8map( :stable, :decompose, :compat); end + def utf8nfkd!; utf8map!(:stable, :decompose, :compat); end + def utf8nfkc; utf8map( :stable, :compose, :compat); end + def utf8nfkc!; utf8map!(:stable, :compose, :compat); end + def utf8chars + result = self.utf8map(:charbound).split("\377") + result.shift if result.first == "" + result + end + def char_ary + # depecated, use String#utf8chars instead + utf8chars + end + end + + module IntegerExtensions + def utf8 + return Utf8Proc::utf8char(self) + end + end + +end + + +class String + include(Utf8Proc::StringExtensions) +end + +class Integer + include(Utf8Proc::IntegerExtensions) +end + diff -r 6921ee309940 -r 951e73a98021 ruby/gem/utf8proc.gemspec --- a/ruby/gem/utf8proc.gemspec Wed Jul 25 12:00:00 2007 +0200 +++ b/ruby/gem/utf8proc.gemspec Fri May 01 12:00:00 2009 +0200 @@ -1,9 +1,8 @@ require 'rubygems' SPEC = Gem::Specification.new do |s| s.name = 'utf8proc' - s.version = '1.1.2' + s.version = '1.1.3' s.author = 'Jan Behrens' - s.email = 'jan.behrens.n4272.expires-2008-06@flexiguided.de' s.homepage = 'http://www.flexiguided.de/publications.utf8proc.en.html' s.summary = 'UTF-8 Unicode string processing' s.files = ['LICENSE', 'lib/utf8proc.rb', 'ext/utf8proc_native.c'] diff -r 6921ee309940 -r 951e73a98021 utf8proc-1.1.2.gem Binary file utf8proc-1.1.2.gem has changed diff -r 6921ee309940 -r 951e73a98021 utf8proc-1.1.3.gem Binary file utf8proc-1.1.3.gem has changed diff -r 6921ee309940 -r 951e73a98021 utf8proc.c --- a/utf8proc.c Wed Jul 25 12:00:00 2007 +0200 +++ b/utf8proc.c Fri May 01 12:00:00 2009 +0200 @@ -97,6 +97,10 @@ #define UTF8PROC_BOUNDCLASS_LVT 10 +const char *utf8proc_version(void) { + return "1.1.3"; +} + const char *utf8proc_errmsg(ssize_t errcode) { switch (errcode) { case UTF8PROC_ERROR_NOMEM: diff -r 6921ee309940 -r 951e73a98021 utf8proc.h --- a/utf8proc.h Wed Jul 25 12:00:00 2007 +0200 +++ b/utf8proc.h Fri May 01 12:00:00 2009 +0200 @@ -220,6 +220,8 @@ extern const int8_t utf8proc_utf8class[256]; +const char *utf8proc_version(void); + const char *utf8proc_errmsg(ssize_t errcode); /* * Returns a static error string for the given error code.