utf8proc

changeset 3:4ee0d5f54af1 v1.0
Version 1.0

- added the LUMP option, which lumps certain characters together (see lump.txt) (also used for the PostgreSQL "unifold" function)
- added the STRIPMARK option, which strips marking characters (or marks of composed characters)
- deprecated ruby method String#char_ary in favour of String#utf8chars
author: jbe
date: Sun Sep 17 12:00:00 2006 +0200 (2006-09-17)
parents: aaad485d5335
children: a49e32490aac
files: Changelog lump.txt pgsql/utf8proc_pgsql.c ruby/utf8proc.rb ruby/utf8proc_native.c utf8proc.c utf8proc.h
     1.1 --- a/Changelog	Fri Aug 04 12:00:00 2006 +0200
     1.2 +++ b/Changelog	Sun Sep 17 12:00:00 2006 +0200
     1.3 @@ -31,3 +31,12 @@
     1.4  
     1.5  Release of version 0.3
     1.6  
     1.7 +2006-09-17:
     1.8 +- added the LUMP option, which lumps certain characters together
     1.9 +  (see lump.txt) (also used for the PostgreSQL "unifold" function)
    1.10 +- added the STRIPMARK option, which strips marking characters
    1.11 +  (or marks of composed characters)
    1.12 +- deprecated ruby method String#char_ary in favour of String#utf8chars
    1.13 +
    1.14 +Release of version 1.0
    1.15 +

     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/lump.txt	Sun Sep 17 12:00:00 2006 +0200
     2.3 @@ -0,0 +1,26 @@
     2.4 +U+0020      <-- all space characters (general category Zs)
     2.5 +U+0027  '   <-- left/right single quotation mark U+2018..2019,
     2.6 +                modifier letter apostrophe U+02BC,
     2.7 +                modifier letter vertical line U+02C8
     2.8 +U+002D  -   <-- all dash characters (general category Pd),
     2.9 +                minus U+2212
    2.10 +U+002F  /   <-- fraction slash U+2044,
    2.11 +                division slash U+2215
    2.12 +U+003A  :   <-- ratio U+2236
    2.13 +U+003C  <   <-- single left-pointing angle quotation mark U+2039,
    2.14 +                left-pointing angle bracket U+2329,
    2.15 +                left angle bracket U+3008
    2.16 +U+003E  >   <-- single right-pointing angle quotation mark U+203A,
    2.17 +                right-pointing angle bracket U+232A,
    2.18 +                right angle bracket U+3009
    2.19 +U+005C  \   <-- set minus U+2216
    2.20 +U+005E  ^   <-- modifier letter up arrowhead U+02C4,
    2.21 +                modifier letter circumflex accent U+02C6,
    2.22 +                caret U+2038,
    2.23 +                up arrowhead U+2303
    2.24 +U+005F  _   <-- all connector characters (general category Pc),
    2.25 +                modifier letter low macron U+02CD
    2.26 +U+0060  `   <-- modifier letter grave accent U+02CB
    2.27 +U+007C  |   <-- divides U+2223
    2.28 +U+007E  ~   <-- tilde operator U+223C
    2.29 +

     3.1 --- a/pgsql/utf8proc_pgsql.c	Fri Aug 04 12:00:00 2006 +0200
     3.2 +++ b/pgsql/utf8proc_pgsql.c	Sun Sep 17 12:00:00 2006 +0200
     3.3 @@ -33,8 +33,8 @@
     3.4  
     3.5  /*
     3.6   *  File name:    pgsql/utf8proc_pgsql.c
     3.7 - *  Version:      0.3
     3.8 - *  Last changed: 2006-08-04
     3.9 + *  Version:      1.0
    3.10 + *  Last changed: 2006-09-17
    3.11   *
    3.12   *  Description:
    3.13   *  PostgreSQL extension to provide a function 'unifold', which can be used
    3.14 @@ -53,7 +53,7 @@
    3.15  
    3.16  #define UTF8PROC_PGSQL_OPTS ( UTF8PROC_REJECTNA | UTF8PROC_COMPAT | \
    3.17    UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | \
    3.18 -  UTF8PROC_NLF2LF | UTF8PROC_CASEFOLD )
    3.19 +  UTF8PROC_NLF2LF | UTF8PROC_CASEFOLD | UTF8PROC_LUMP )
    3.20  
    3.21  PG_FUNCTION_INFO_V1(utf8proc_pgsql_unifold);
    3.22  Datum utf8proc_pgsql_unifold(PG_FUNCTION_ARGS) {

     4.1 --- a/ruby/utf8proc.rb	Fri Aug 04 12:00:00 2006 +0200
     4.2 +++ b/ruby/utf8proc.rb	Sun Sep 17 12:00:00 2006 +0200
     4.3 @@ -33,8 +33,8 @@
     4.4  
     4.5  ##
     4.6   #  File name:    ruby/utf8proc.rb
     4.7 - #  Version:      0.3
     4.8 - #  Last changed: 2006-08-04
     4.9 + #  Version:      1.0
    4.10 + #  Last changed: 2006-09-17
    4.11   #
    4.12   #  Description:
    4.13   #  Part of the ruby wrapper for libutf8proc, which is written in ruby.
    4.14 @@ -82,10 +82,14 @@
    4.15      def utf8nfkd!; utf8map!(:stable, :decompose, :compat); end
    4.16      def utf8nfkc;  utf8map( :stable, :compose, :compat); end
    4.17      def utf8nfkc!; utf8map!(:stable, :compose, :compat); end
    4.18 +    def utf8chars
    4.19 +      result = self.utf8map(:charbound).split("\377")
    4.20 +      result.shift if result.first.empty?
    4.21 +      result
    4.22 +    end
    4.23      def char_ary
    4.24 -      char_ary = self.utf8map(:charbound).split("\377")
    4.25 -      char_ary.shift if char_ary.first == ''
    4.26 -      char_ary
    4.27 +      # depecated, use String#utf8chars instead
    4.28 +      utf8chars
    4.29      end
    4.30    end
    4.31  

     5.1 --- a/ruby/utf8proc_native.c	Fri Aug 04 12:00:00 2006 +0200
     5.2 +++ b/ruby/utf8proc_native.c	Sun Sep 17 12:00:00 2006 +0200
     5.3 @@ -33,8 +33,8 @@
     5.4  
     5.5  /*
     5.6   *  File name:    ruby/utf8proc_native.c
     5.7 - *  Version:      0.3
     5.8 - *  Last changed: 2006-08-04
     5.9 + *  Version:      1.0
    5.10 + *  Last changed: 2006-09-17
    5.11   *
    5.12   *  Description:
    5.13   *  Native part of the ruby wrapper for libutf8proc.
    5.14 @@ -65,6 +65,7 @@
    5.15      case UTF8PROC_ERROR_NOMEM:
    5.16      excpt_class = rb_eNoMemError; break;
    5.17      case UTF8PROC_ERROR_OVERFLOW:
    5.18 +    case UTF8PROC_ERROR_INVALIDOPTS:
    5.19      excpt_class = rb_eArgError; break;
    5.20      case UTF8PROC_ERROR_INVALIDUTF8:
    5.21      excpt_class = utf8proc_ruby_eInvalidUtf8Error; break;
    5.22 @@ -157,6 +158,8 @@
    5.23    register_utf8proc_option("stripcc",   UTF8PROC_STRIPCC);
    5.24    register_utf8proc_option("casefold",  UTF8PROC_CASEFOLD);
    5.25    register_utf8proc_option("charbound", UTF8PROC_CHARBOUND);
    5.26 +  register_utf8proc_option("lump",      UTF8PROC_LUMP);
    5.27 +  register_utf8proc_option("stripmark", UTF8PROC_STRIPMARK);
    5.28    OBJ_FREEZE(utf8proc_ruby_options);
    5.29    rb_define_const(utf8proc_ruby_module, "Options", utf8proc_ruby_options);
    5.30  }

     6.1 --- a/utf8proc.c	Fri Aug 04 12:00:00 2006 +0200
     6.2 +++ b/utf8proc.c	Sun Sep 17 12:00:00 2006 +0200
     6.3 @@ -42,8 +42,8 @@
     6.4  
     6.5  /*
     6.6   *  File name:    utf8proc.c
     6.7 - *  Version:      0.3
     6.8 - *  Last changed: 2006-08-04
     6.9 + *  Version:      1.0
    6.10 + *  Last changed: 2006-09-17
    6.11   *
    6.12   *  Description:
    6.13   *  Implementation of libutf8proc.
    6.14 @@ -116,6 +116,8 @@
    6.15      return "Invalid UTF-8 string";
    6.16      case UTF8PROC_ERROR_NOTASSIGNED:
    6.17      return "Unassigned Unicode code point found in UTF-8 string.";
    6.18 +    case UTF8PROC_ERROR_INVALIDOPTS:
    6.19 +    return "Invalid options for UTF-8 processing chosen.";
    6.20      default:
    6.21      return "An unknown error occured while processing UTF-8 data.";
    6.22    }
    6.23 @@ -197,59 +199,103 @@
    6.24    );
    6.25  }
    6.26  
    6.27 +#define utf8proc_decompose_lump(replacement_uc) \
    6.28 +  return utf8proc_decompose_char((replacement_uc), dst, bufsize, \
    6.29 +  options & ~UTF8PROC_LUMP, last_boundclass)
    6.30 +
    6.31  ssize_t utf8proc_decompose_char(int32_t uc, int32_t *dst, ssize_t bufsize,
    6.32      int options, int *last_boundclass) {
    6.33    // ASSERT: uc >= 0 && uc < 0x110000
    6.34    const utf8proc_property_t *property;
    6.35 +  utf8proc_propval_t category;
    6.36    int32_t hangul_sindex;
    6.37    property = utf8proc_get_property(uc);
    6.38 +  category = property->category;
    6.39    hangul_sindex = uc - UTF8PROC_HANGUL_SBASE;
    6.40 -  if ((options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) &&
    6.41 -      hangul_sindex >= 0 && hangul_sindex < UTF8PROC_HANGUL_SCOUNT) {
    6.42 -    int32_t hangul_tindex;
    6.43 -    if (bufsize >= 1) {
    6.44 -      dst[0] = UTF8PROC_HANGUL_LBASE +
    6.45 -        hangul_sindex / UTF8PROC_HANGUL_NCOUNT;
    6.46 -      if (bufsize >= 2) dst[1] = UTF8PROC_HANGUL_VBASE +
    6.47 -        (hangul_sindex % UTF8PROC_HANGUL_NCOUNT) / UTF8PROC_HANGUL_TCOUNT;
    6.48 +  if (options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) {
    6.49 +    if (hangul_sindex >= 0 && hangul_sindex < UTF8PROC_HANGUL_SCOUNT) {
    6.50 +      int32_t hangul_tindex;
    6.51 +      if (bufsize >= 1) {
    6.52 +        dst[0] = UTF8PROC_HANGUL_LBASE +
    6.53 +          hangul_sindex / UTF8PROC_HANGUL_NCOUNT;
    6.54 +        if (bufsize >= 2) dst[1] = UTF8PROC_HANGUL_VBASE +
    6.55 +          (hangul_sindex % UTF8PROC_HANGUL_NCOUNT) / UTF8PROC_HANGUL_TCOUNT;
    6.56 +      }
    6.57 +      hangul_tindex = hangul_sindex % UTF8PROC_HANGUL_TCOUNT;
    6.58 +      if (!hangul_tindex) return 2;
    6.59 +      if (bufsize >= 3) dst[2] = UTF8PROC_HANGUL_TBASE + hangul_tindex;
    6.60 +      return 3;
    6.61      }
    6.62 -    hangul_tindex = hangul_sindex % UTF8PROC_HANGUL_TCOUNT;
    6.63 -    if (!hangul_tindex) return 2;
    6.64 -    if (bufsize >= 3) dst[2] = UTF8PROC_HANGUL_TBASE + hangul_tindex;
    6.65 -    return 3;
    6.66 -  } else if ((options & UTF8PROC_REJECTNA) && !property->category) {
    6.67 -    return UTF8PROC_ERROR_NOTASSIGNED;
    6.68 -  } else if ((options & UTF8PROC_IGNORE) && property->ignorable) {
    6.69 -    return 0;
    6.70 -  } else if ((options & UTF8PROC_CASEFOLD) && property->casefold_mapping) {
    6.71 -    const int32_t *casefold_entry;
    6.72 -    ssize_t written = 0;
    6.73 -    for (casefold_entry = property->casefold_mapping;
    6.74 -        *casefold_entry >= 0; casefold_entry++) {
    6.75 -      written += utf8proc_decompose_char(*casefold_entry, dst+written,
    6.76 -        (bufsize > written) ? (bufsize - written) : 0, options,
    6.77 +  }
    6.78 +  if (options & UTF8PROC_REJECTNA) {
    6.79 +    if (!category) return UTF8PROC_ERROR_NOTASSIGNED;
    6.80 +  }
    6.81 +  if (options & UTF8PROC_IGNORE) {
    6.82 +    if (property->ignorable) return 0;
    6.83 +  }
    6.84 +  if (options & UTF8PROC_LUMP) {
    6.85 +    if (category == UTF8PROC_CATEGORY_ZS) utf8proc_decompose_lump(0x0020);
    6.86 +    if (uc == 0x2018 || uc == 0x2019 || uc == 0x02BC || uc == 0x02C8)
    6.87 +      utf8proc_decompose_lump(0x0027);
    6.88 +    if (category == UTF8PROC_CATEGORY_PD || uc == 0x2212)
    6.89 +      utf8proc_decompose_lump(0x002D);
    6.90 +    if (uc == 0x2044 || uc == 0x2215) utf8proc_decompose_lump(0x002F);
    6.91 +    if (uc == 0x2236) utf8proc_decompose_lump(0x003A);
    6.92 +    if (uc == 0x2039 || uc == 0x2329 || uc == 0x3008)
    6.93 +      utf8proc_decompose_lump(0x003C);
    6.94 +    if (uc == 0x203A || uc == 0x232A || uc == 0x3009)
    6.95 +      utf8proc_decompose_lump(0x003E);
    6.96 +    if (uc == 0x2216) utf8proc_decompose_lump(0x005C);
    6.97 +    if (uc == 0x02C4 || uc == 0x02C6 || uc == 0x2038 || uc == 0x2303)
    6.98 +      utf8proc_decompose_lump(0x005E);
    6.99 +    if (category == UTF8PROC_CATEGORY_PC || uc == 0x02CD)
   6.100 +      utf8proc_decompose_lump(0x005F);
   6.101 +    if (uc == 0x02CB) utf8proc_decompose_lump(0x0060);
   6.102 +    if (uc == 0x2223) utf8proc_decompose_lump(0x007C);
   6.103 +    if (uc == 0x223C) utf8proc_decompose_lump(0x007E);
   6.104 +    if ((options & UTF8PROC_NLF2LS) && (options & UTF8PROC_NLF2PS)) {
   6.105 +      if (category == UTF8PROC_CATEGORY_ZL ||
   6.106 +          category == UTF8PROC_CATEGORY_ZP)
   6.107 +        utf8proc_decompose_lump(0x000A);
   6.108 +    }
   6.109 +  }
   6.110 +  if (options & UTF8PROC_STRIPMARK) {
   6.111 +    if (category == UTF8PROC_CATEGORY_MN ||
   6.112 +      category == UTF8PROC_CATEGORY_MC ||
   6.113 +      category == UTF8PROC_CATEGORY_ME) return 0;
   6.114 +  }
   6.115 +  if (options & UTF8PROC_CASEFOLD) {
   6.116 +    if (property->casefold_mapping) {
   6.117 +      const int32_t *casefold_entry;
   6.118 +      ssize_t written = 0;
   6.119 +      for (casefold_entry = property->casefold_mapping;
   6.120 +          *casefold_entry >= 0; casefold_entry++) {
   6.121 +        written += utf8proc_decompose_char(*casefold_entry, dst+written,
   6.122 +          (bufsize > written) ? (bufsize - written) : 0, options,
   6.123 +          last_boundclass);
   6.124 +        if (written < 0) return UTF8PROC_ERROR_OVERFLOW;
   6.125 +      }
   6.126 +      return written;
   6.127 +    }
   6.128 +  }
   6.129 +  if (options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) {
   6.130 +    if (property->decomp_mapping &&
   6.131 +        (!property->decomp_type || (options & UTF8PROC_COMPAT))) {
   6.132 +      const int32_t *decomp_entry;
   6.133 +      ssize_t written = 0;
   6.134 +      for (decomp_entry = property->decomp_mapping;
   6.135 +          *decomp_entry >= 0; decomp_entry++) {
   6.136 +        written += utf8proc_decompose_char(*decomp_entry, dst+written,
   6.137 +          (bufsize > written) ? (bufsize - written) : 0, options,
   6.138          last_boundclass);
   6.139 -      if (written < 0) return UTF8PROC_ERROR_OVERFLOW;
   6.140 +        if (written < 0) return UTF8PROC_ERROR_OVERFLOW;
   6.141 +      }
   6.142 +      return written;
   6.143      }
   6.144 -    return written;
   6.145 -  } else if ((options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) &&
   6.146 -      property->decomp_mapping &&
   6.147 -      (!property->decomp_type || (options & UTF8PROC_COMPAT))) {
   6.148 -    const int32_t *decomp_entry;
   6.149 -    ssize_t written = 0;
   6.150 -    for (decomp_entry = property->decomp_mapping;
   6.151 -        *decomp_entry >= 0; decomp_entry++) {
   6.152 -      written += utf8proc_decompose_char(*decomp_entry, dst+written,
   6.153 -        (bufsize > written) ? (bufsize - written) : 0, options,
   6.154 -        last_boundclass);
   6.155 -      if (written < 0) return UTF8PROC_ERROR_OVERFLOW;
   6.156 -    }
   6.157 -    return written;
   6.158 -  } else if (options & UTF8PROC_CHARBOUND) {
   6.159 +  }
   6.160 +  if (options & UTF8PROC_CHARBOUND) {
   6.161      bool boundary;
   6.162      int tbc, lbc;
   6.163 -    int category;
   6.164 -    category = property->category;
   6.165      tbc =
   6.166        (uc == 0x000D) ? UTF8PROC_BOUNDCLASS_CR :
   6.167        (uc == 0x000A) ? UTF8PROC_BOUNDCLASS_LF :
   6.168 @@ -306,6 +352,11 @@
   6.169      int32_t *buffer, ssize_t bufsize, int options) {
   6.170    // strlen will be ignored, if UTF8PROC_NULLTERM is set in options
   6.171    ssize_t wpos = 0;
   6.172 +  if ((options & UTF8PROC_COMPOSE) && (options & UTF8PROC_DECOMPOSE))
   6.173 +    return UTF8PROC_ERROR_INVALIDOPTS;
   6.174 +  if ((options & UTF8PROC_STRIPMARK) &&
   6.175 +      !(options & UTF8PROC_COMPOSE) && !(options & UTF8PROC_DECOMPOSE))
   6.176 +    return UTF8PROC_ERROR_INVALIDOPTS;
   6.177    {
   6.178      int32_t uc;
   6.179      ssize_t rpos = 0;
   6.180 @@ -395,7 +446,7 @@
   6.181      int32_t *starter = NULL;
   6.182      int32_t current_char;
   6.183      const utf8proc_property_t *starter_property = NULL, *current_property;
   6.184 -    int16_t max_combining_class = -1;
   6.185 +    utf8proc_propval_t max_combining_class = -1;
   6.186      ssize_t rpos;
   6.187      ssize_t wpos = 0;
   6.188      int32_t composition;

     7.1 --- a/utf8proc.h	Fri Aug 04 12:00:00 2006 +0200
     7.2 +++ b/utf8proc.h	Sun Sep 17 12:00:00 2006 +0200
     7.3 @@ -42,8 +42,8 @@
     7.4  
     7.5  /*
     7.6   *  File name:    utf8proc.h
     7.7 - *  Version:      0.3
     7.8 - *  Last changed: 2006-08-04
     7.9 + *  Version:      1.0
    7.10 + *  Last changed: 2006-09-17
    7.11   *
    7.12   *  Description:
    7.13   *  Header files for libutf8proc, which is a mapping tool for UTF-8 strings
    7.14 @@ -52,8 +52,12 @@
    7.15   *  - replacing compatibility characters with their equivalents
    7.16   *  - stripping of "default ignorable characters"
    7.17   *    like SOFT-HYPHEN or ZERO-WIDTH-SPACE
    7.18 + *  - folding of certain characters for string comparison
    7.19 + *    (e.g. HYPHEN U+2010 and MINUS U+2212 to ASCII "-")
    7.20 + *    (see "LUMP" option)
    7.21   *  - optional rejection of strings containing non-assigned code points
    7.22   *  - stripping of control characters
    7.23 + *  - stripping of character marks (accents, etc.)
    7.24   *  - transformation of LF, CRLF, CR and NEL to line-feed (LF)
    7.25   *    or to the unicode chararacters for paragraph separation (PS)
    7.26   *    or line separation (LS).
    7.27 @@ -91,6 +95,8 @@
    7.28  #define UTF8PROC_STRIPCC   (1<<9)
    7.29  #define UTF8PROC_CASEFOLD  (1<<10)
    7.30  #define UTF8PROC_CHARBOUND (1<<11)
    7.31 +#define UTF8PROC_LUMP      (1<<12)
    7.32 +#define UTF8PROC_STRIPMARK (1<<13)
    7.33  /*
    7.34   *  Flags being regarded by several functions in the library:
    7.35   *  NULLTERM:  The given UTF-8 input is NULL terminated.
    7.36 @@ -118,12 +124,21 @@
    7.37   *             case-insensitive string comparison.
    7.38   *  CHARBOUND: Inserts 0xFF bytes at the beginning of each sequence which is
    7.39   *             representing a single grapheme cluster (a single character).
    7.40 + *  LUMP:      Lumps certain characters together
    7.41 + *             (e.g. HYPHEN U+2010 and MINUS U+2212 to ASCII "-").
    7.42 + *             (See lump.txt for details.)
    7.43 + *             If NLF2LF is set, this includes a transformation of paragraph
    7.44 + *             and line separators to ASCII line-feed (LF).
    7.45 + *  STRIPMARK: Strips all character markings
    7.46 + *             (non-spacing, spacing and enclosing) (i.e. accents)
    7.47 + *             NOTE: this option works only with COMPOSE or DECOMPOSE
    7.48   */
    7.49  
    7.50  #define UTF8PROC_ERROR_NOMEM -1
    7.51  #define UTF8PROC_ERROR_OVERFLOW -2
    7.52  #define UTF8PROC_ERROR_INVALIDUTF8 -3
    7.53  #define UTF8PROC_ERROR_NOTASSIGNED -4
    7.54 +#define UTF8PROC_ERROR_INVALIDOPTS -5
    7.55  /*
    7.56   *  Error codes being returned by almost all functions:
    7.57   *  ERROR_NOMEM:       Memory could not be allocated.
    7.58 @@ -131,13 +146,15 @@
    7.59   *  ERROR_INVALIDUTF8: The given string is not a legal UTF-8 string.
    7.60   *  ERROR_NOTASSIGNED: The REJECTNA flag was set,
    7.61   *                     and an unassigned code point was found.
    7.62 + *  ERROR_INVALIDOPTS: Invalid options have been used.
    7.63   */
    7.64  
    7.65 +typedef int16_t utf8proc_propval_t;
    7.66  typedef struct utf8proc_property_struct {
    7.67 -  int16_t category;
    7.68 -  int16_t combining_class;
    7.69 -  int16_t bidi_class;
    7.70 -  int16_t decomp_type;
    7.71 +  utf8proc_propval_t category;
    7.72 +  utf8proc_propval_t combining_class;
    7.73 +  utf8proc_propval_t bidi_class;
    7.74 +  utf8proc_propval_t decomp_type;
    7.75    const int32_t *decomp_mapping;
    7.76    const unsigned bidi_mirrored:1;
    7.77    const int32_t uppercase_mapping;
    7.78 @@ -267,6 +284,8 @@
    7.79   *  COMPAT:    replace certain characters with their
    7.80   *             compatibility decomposition
    7.81   *  CHARBOUND: Inserts 0xFF bytes before each grapheme cluster
    7.82 + *  LUMP:      lumps certain different characters together
    7.83 + *  STRIPMARK: removes all character marks
    7.84   *  The pointer 'last_boundclass' has to point to an integer variable which is
    7.85   *  storing the last character boundary class, if the CHARBOUND option is
    7.86   *  used.
author	jbe
date	Sun Sep 17 12:00:00 2006 +0200 (2006-09-17)
parents	aaad485d5335
children	a49e32490aac
files	Changelog lump.txt pgsql/utf8proc_pgsql.c ruby/utf8proc.rb ruby/utf8proc_native.c utf8proc.c utf8proc.h