utf8proc

changeset 9:951e73a98021 v1.1.3

Version 1.1.3

- Added a function utf8proc_version returning a string containing the version number of the library.
- Included a target libutf8proc.dylib for MacOSX.
- PostgreSQL 8.3 compatibility (use of SET_VARSIZE macro)
author jbe
date Fri May 01 12:00:00 2009 +0200 (2009-05-01)
parents 6921ee309940
children 00d2bcbdc945
files Changelog LICENSE Makefile README pgsql/utf8proc_pgsql.c ruby/gem/LICENSE ruby/gem/lib/utf8proc.rb ruby/gem/utf8proc.gemspec utf8proc-1.1.2.gem utf8proc-1.1.3.gem utf8proc.c utf8proc.h
line diff
     1.1 --- a/Changelog	Wed Jul 25 12:00:00 2007 +0200
     1.2 +++ b/Changelog	Fri May 01 12:00:00 2009 +0200
     1.3 @@ -16,6 +16,7 @@
     1.4  
     1.5  Release of version 0.2
     1.6  
     1.7 +
     1.8  2006-07-18:
     1.9  - changed normalization from NFC to NFKC for postgresql unifold function
    1.10  
    1.11 @@ -33,6 +34,7 @@
    1.12  
    1.13  Release of version 0.3
    1.14  
    1.15 +
    1.16  2006-09-17:
    1.17  - added the LUMP option, which lumps certain characters together
    1.18    (see lump.txt) (also used for the PostgreSQL "unifold" function)
    1.19 @@ -42,11 +44,13 @@
    1.20  
    1.21  Release of version 1.0
    1.22  
    1.23 +
    1.24  2006-09-20:
    1.25  - included a gem file for the ruby version of the library
    1.26  
    1.27  Release of version 1.0.1
    1.28  
    1.29 +
    1.30  2006-09-21:
    1.31  - included a check in Integer#utf8, which raises an exception, if the given
    1.32    code-point is invalid because of being too high (this was missing yet)
    1.33 @@ -56,12 +60,14 @@
    1.34  
    1.35  Release of version 1.0.2
    1.36  
    1.37 +
    1.38  2007-03-16:
    1.39  - Fixed a bug in the ruby library, which caused an error, when splitting an
    1.40    empty string at grapheme cluster boundaries (method String#utf8chars).
    1.41  
    1.42  Release of version 1.0.3
    1.43  
    1.44 +
    1.45  2007-06-25:
    1.46  - Added a new PostgreSQL function 'unistrip', which behaves like 'unifold',
    1.47    but also removes all character marks (e.g. accents).
    1.48 @@ -75,8 +81,22 @@
    1.49  
    1.50  Release of version 1.1.1
    1.51  
    1.52 +
    1.53  2007-07-25:
    1.54  - Fixed a serious bug in the data file generator, which caused characters
    1.55    being treated incorrectly, when stripping default ignorable characters or
    1.56    calculating grapheme cluster boundaries.
    1.57  
    1.58 +Release of version 1.1.2
    1.59 +
    1.60 +
    1.61 +2008-10-04:
    1.62 +- Added a function utf8proc_version returning a string containing the version
    1.63 +  number of the library.
    1.64 +- Included a target libutf8proc.dylib for MacOSX.
    1.65 +
    1.66 +2009-05-01:
    1.67 +- PostgreSQL 8.3 compatibility (use of SET_VARSIZE macro)
    1.68 +
    1.69 +Release of version 1.1.3
    1.70 +
     2.1 --- a/LICENSE	Wed Jul 25 12:00:00 2007 +0200
     2.2 +++ b/LICENSE	Fri May 01 12:00:00 2009 +0200
     2.3 @@ -1,5 +1,5 @@
     2.4  
     2.5 -Copyright (c) 2006-2007 Jan Behrens, FlexiGuided GmbH, Berlin
     2.6 +Copyright (c) 2006-2009 Jan Behrens, FlexiGuided GmbH, Berlin
     2.7  
     2.8  Permission is hereby granted, free of charge, to any person obtaining a
     2.9  copy of this software and associated documentation files (the "Software"),
     3.1 --- a/Makefile	Wed Jul 25 12:00:00 2007 +0200
     3.2 +++ b/Makefile	Fri May 01 12:00:00 2009 +0200
     3.3 @@ -4,7 +4,7 @@
     3.4  # settings
     3.5  
     3.6  cflags = -O2 -std=c99 -pedantic -Wall -fpic $(CFLAGS)
     3.7 -cc = gcc $(cflags)
     3.8 +cc = $(CC) $(cflags)
     3.9  
    3.10  
    3.11  # meta targets
    3.12 @@ -35,6 +35,9 @@
    3.13  	$(cc) -shared -o libutf8proc.so utf8proc.o
    3.14  	chmod a-x libutf8proc.so
    3.15  
    3.16 +libutf8proc.dylib: utf8proc.o
    3.17 +	$(cc) -dynamiclib -o $@ $^ -install_name $(libdir)/$@
    3.18 +
    3.19  ruby/Makefile: ruby/extconf.rb
    3.20  	cd ruby && ruby extconf.rb
    3.21  
     4.1 --- a/README	Wed Jul 25 12:00:00 2007 +0200
     4.2 +++ b/README	Fri May 01 12:00:00 2009 +0200
     4.3 @@ -11,7 +11,7 @@
     4.4  "make all" can be used to build everything, but both ruby and PostgreSQL
     4.5  installations are required in this case.
     4.6  
     4.7 -For ruby there is alternatively provided a gem-file "utf8proc-1.1.1.gem".
     4.8 +For ruby there is alternatively provided a gem-file "utf8proc-1.1.3.gem".
     4.9  
    4.10  
    4.11  *** GENERAL INFORMATION ***
    4.12 @@ -110,6 +110,8 @@
    4.13  If you find any bugs or experience difficulties in compiling this software,
    4.14  please contact me:
    4.15  
    4.16 -Jan Behrens <jan.behrens.n4272.expires-2008-06@flexiguided.de>
    4.17 -http://www.flexiguided.de/publications.utf8proc.en.html
    4.18 +Project page: http://www.flexiguided.de/publications.utf8proc.en.html
    4.19 +Contact form: http://www.flexiguided.de/contactform.en.html
    4.20  
    4.21 +Jan Behrens
    4.22 +
     5.1 --- a/pgsql/utf8proc_pgsql.c	Wed Jul 25 12:00:00 2007 +0200
     5.2 +++ b/pgsql/utf8proc_pgsql.c	Fri May 01 12:00:00 2009 +0200
     5.3 @@ -1,5 +1,5 @@
     5.4  /*
     5.5 - *  Copyright (c) 2006-2007 Jan Behrens, FlexiGuided GmbH, Berlin
     5.6 + *  Copyright (c) 2006-2009 Jan Behrens, FlexiGuided GmbH, Berlin
     5.7   *
     5.8   *  Permission is hereby granted, free of charge, to any person obtaining a
     5.9   *  copy of this software and associated documentation files (the "Software"),
    5.10 @@ -23,8 +23,8 @@
    5.11  
    5.12  /*
    5.13   *  File name:    pgsql/utf8proc_pgsql.c
    5.14 - *  Version:      1.1.1
    5.15 - *  Last changed: 2007-07-22
    5.16 + *  Version:      1.1.3
    5.17 + *  Last changed: 2009-05-01
    5.18   *
    5.19   *  Description:
    5.20   *  PostgreSQL extension to provide two functions 'unifold' and 'unistrip',
    5.21 @@ -77,7 +77,7 @@
    5.22    result = utf8proc_reencode(
    5.23      (int32_t *)VARDATA(output_string), result, options
    5.24    );
    5.25 -  if (result >= 0) VARATT_SIZEP(output_string) = result + VARHDRSZ;
    5.26 +  if (result >= 0) SET_VARSIZE(output_string, result + VARHDRSZ);
    5.27    return result;
    5.28  }
    5.29  
     6.1 --- a/ruby/gem/LICENSE	Wed Jul 25 12:00:00 2007 +0200
     6.2 +++ b/ruby/gem/LICENSE	Fri May 01 12:00:00 2009 +0200
     6.3 @@ -1,1 +1,64 @@
     6.4 -../../LICENSE
     6.5 \ No newline at end of file
     6.6 +
     6.7 +Copyright (c) 2006-2009 Jan Behrens, FlexiGuided GmbH, Berlin
     6.8 +
     6.9 +Permission is hereby granted, free of charge, to any person obtaining a
    6.10 +copy of this software and associated documentation files (the "Software"),
    6.11 +to deal in the Software without restriction, including without limitation
    6.12 +the rights to use, copy, modify, merge, publish, distribute, sublicense,
    6.13 +and/or sell copies of the Software, and to permit persons to whom the
    6.14 +Software is furnished to do so, subject to the following conditions:
    6.15 +
    6.16 +The above copyright notice and this permission notice shall be included in
    6.17 +all copies or substantial portions of the Software.
    6.18 +
    6.19 +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    6.20 +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    6.21 +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    6.22 +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    6.23 +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    6.24 +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
    6.25 +DEALINGS IN THE SOFTWARE.
    6.26 +
    6.27 +
    6.28 +This software distribution contains derived data from a modified version of
    6.29 +the Unicode data files. The following license applies to that data:
    6.30 +
    6.31 +COPYRIGHT AND PERMISSION NOTICE
    6.32 +
    6.33 +Copyright (c) 1991-2007 Unicode, Inc. All rights reserved. Distributed
    6.34 +under the Terms of Use in http://www.unicode.org/copyright.html.
    6.35 +
    6.36 +Permission is hereby granted, free of charge, to any person obtaining a
    6.37 +copy of the Unicode data files and any associated documentation (the "Data
    6.38 +Files") or Unicode software and any associated documentation (the
    6.39 +"Software") to deal in the Data Files or Software without restriction,
    6.40 +including without limitation the rights to use, copy, modify, merge,
    6.41 +publish, distribute, and/or sell copies of the Data Files or Software, and
    6.42 +to permit persons to whom the Data Files or Software are furnished to do
    6.43 +so, provided that (a) the above copyright notice(s) and this permission
    6.44 +notice appear with all copies of the Data Files or Software, (b) both the
    6.45 +above copyright notice(s) and this permission notice appear in associated
    6.46 +documentation, and (c) there is clear notice in each modified Data File or
    6.47 +in the Software as well as in the documentation associated with the Data
    6.48 +File(s) or Software that the data or software has been modified.
    6.49 +
    6.50 +THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
    6.51 +KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    6.52 +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
    6.53 +THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
    6.54 +INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR
    6.55 +CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
    6.56 +USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
    6.57 +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
    6.58 +PERFORMANCE OF THE DATA FILES OR SOFTWARE.
    6.59 +
    6.60 +Except as contained in this notice, the name of a copyright holder shall
    6.61 +not be used in advertising or otherwise to promote the sale, use or other
    6.62 +dealings in these Data Files or Software without prior written
    6.63 +authorization of the copyright holder.
    6.64 +
    6.65 +
    6.66 +Unicode and the Unicode logo are trademarks of Unicode, Inc., and may be
    6.67 +registered in some jurisdictions. All other trademarks and registered
    6.68 +trademarks mentioned herein are the property of their respective owners.
    6.69 +
     7.1 --- a/ruby/gem/lib/utf8proc.rb	Wed Jul 25 12:00:00 2007 +0200
     7.2 +++ b/ruby/gem/lib/utf8proc.rb	Fri May 01 12:00:00 2009 +0200
     7.3 @@ -1,1 +1,100 @@
     7.4 -../../utf8proc.rb
     7.5 \ No newline at end of file
     7.6 +#  Copyright (c) 2006-2007 Jan Behrens, FlexiGuided GmbH, Berlin
     7.7 +#
     7.8 +#  Permission is hereby granted, free of charge, to any person obtaining a
     7.9 +#  copy of this software and associated documentation files (the "Software"),
    7.10 +#  to deal in the Software without restriction, including without limitation
    7.11 +#  the rights to use, copy, modify, merge, publish, distribute, sublicense,
    7.12 +#  and/or sell copies of the Software, and to permit persons to whom the
    7.13 +#  Software is furnished to do so, subject to the following conditions:
    7.14 +#
    7.15 +#  The above copyright notice and this permission notice shall be included in
    7.16 +#  all copies or substantial portions of the Software.
    7.17 +#
    7.18 +#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    7.19 +#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    7.20 +#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    7.21 +#  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    7.22 +#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    7.23 +#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
    7.24 +#  DEALINGS IN THE SOFTWARE.
    7.25 +
    7.26 +
    7.27 +#
    7.28 +#  File name:    ruby/utf8proc.rb
    7.29 +#  Version:      1.1.1
    7.30 +#  Last changed: 2006-09-17
    7.31 +#
    7.32 +#  Description:
    7.33 +#  Part of the ruby wrapper for libutf8proc, which is written in ruby.
    7.34 +#
    7.35 +
    7.36 +
    7.37 +require 'utf8proc_native'
    7.38 +
    7.39 +
    7.40 +module Utf8Proc
    7.41 +
    7.42 +  SpecialChars = {
    7.43 +    :HT => "\x09",
    7.44 +    :LF => "\x0A",
    7.45 +    :VT => "\x0B",
    7.46 +    :FF => "\x0C",
    7.47 +    :CR => "\x0D",
    7.48 +    :FS => "\x1C",
    7.49 +    :GS => "\x1D",
    7.50 +    :RS => "\x1E",
    7.51 +    :US => "\x1F",
    7.52 +    :LS => "\xE2\x80\xA8",
    7.53 +    :PS => "\xE2\x80\xA9",
    7.54 +  }
    7.55 +
    7.56 +  module StringExtensions
    7.57 +    def utf8map(*option_array)
    7.58 +      options = 0
    7.59 +      option_array.each do |option|
    7.60 +        flag = Utf8Proc::Options[option]
    7.61 +        raise ArgumentError, "Unknown argument given to String#utf8map." unless
    7.62 +          flag
    7.63 +        options |= flag
    7.64 +      end
    7.65 +      return Utf8Proc::utf8map(self, options)
    7.66 +    end
    7.67 +    def utf8map!(*option_array)
    7.68 +      self.replace(self.utf8map(*option_array))
    7.69 +    end
    7.70 +    def utf8nfd;   utf8map( :stable, :decompose); end
    7.71 +    def utf8nfd!;  utf8map!(:stable, :decompose); end
    7.72 +    def utf8nfc;   utf8map( :stable, :compose); end
    7.73 +    def utf8nfc!;  utf8map!(:stable, :compose); end
    7.74 +    def utf8nfkd;  utf8map( :stable, :decompose, :compat); end
    7.75 +    def utf8nfkd!; utf8map!(:stable, :decompose, :compat); end
    7.76 +    def utf8nfkc;  utf8map( :stable, :compose, :compat); end
    7.77 +    def utf8nfkc!; utf8map!(:stable, :compose, :compat); end
    7.78 +    def utf8chars
    7.79 +      result = self.utf8map(:charbound).split("\377")
    7.80 +      result.shift if result.first == ""
    7.81 +      result
    7.82 +    end
    7.83 +    def char_ary
    7.84 +      # depecated, use String#utf8chars instead
    7.85 +      utf8chars
    7.86 +    end
    7.87 +  end
    7.88 +
    7.89 +  module IntegerExtensions
    7.90 +    def utf8
    7.91 +      return Utf8Proc::utf8char(self)
    7.92 +    end
    7.93 +  end
    7.94 +
    7.95 +end
    7.96 +
    7.97 +
    7.98 +class String
    7.99 +  include(Utf8Proc::StringExtensions)
   7.100 +end
   7.101 +
   7.102 +class Integer
   7.103 +  include(Utf8Proc::IntegerExtensions)
   7.104 +end
   7.105 +
     8.1 --- a/ruby/gem/utf8proc.gemspec	Wed Jul 25 12:00:00 2007 +0200
     8.2 +++ b/ruby/gem/utf8proc.gemspec	Fri May 01 12:00:00 2009 +0200
     8.3 @@ -1,9 +1,8 @@
     8.4  require 'rubygems'
     8.5  SPEC = Gem::Specification.new do |s|
     8.6    s.name = 'utf8proc'
     8.7 -  s.version = '1.1.2'
     8.8 +  s.version = '1.1.3'
     8.9    s.author = 'Jan Behrens'
    8.10 -  s.email = 'jan.behrens.n4272.expires-2008-06@flexiguided.de'
    8.11    s.homepage = 'http://www.flexiguided.de/publications.utf8proc.en.html'
    8.12    s.summary = 'UTF-8 Unicode string processing'
    8.13    s.files = ['LICENSE', 'lib/utf8proc.rb', 'ext/utf8proc_native.c']
     9.1 Binary file utf8proc-1.1.2.gem has changed
    10.1 Binary file utf8proc-1.1.3.gem has changed
    11.1 --- a/utf8proc.c	Wed Jul 25 12:00:00 2007 +0200
    11.2 +++ b/utf8proc.c	Fri May 01 12:00:00 2009 +0200
    11.3 @@ -97,6 +97,10 @@
    11.4  #define UTF8PROC_BOUNDCLASS_LVT     10
    11.5  
    11.6  
    11.7 +const char *utf8proc_version(void) {
    11.8 +  return "1.1.3";
    11.9 +}
   11.10 +
   11.11  const char *utf8proc_errmsg(ssize_t errcode) {
   11.12    switch (errcode) {
   11.13      case UTF8PROC_ERROR_NOMEM:
    12.1 --- a/utf8proc.h	Wed Jul 25 12:00:00 2007 +0200
    12.2 +++ b/utf8proc.h	Fri May 01 12:00:00 2009 +0200
    12.3 @@ -220,6 +220,8 @@
    12.4  
    12.5  extern const int8_t utf8proc_utf8class[256];
    12.6  
    12.7 +const char *utf8proc_version(void);
    12.8 +
    12.9  const char *utf8proc_errmsg(ssize_t errcode);
   12.10  /*
   12.11   *  Returns a static error string for the given error code.

Impressum / About Us