utf8proc

diff ruby/utf8proc_native.c @ 2:aaad485d5335

Version 0.3

- changed normalization from NFC to NFKC for postgresql unifold function
- added support to mark the beginning of a grapheme cluster with 0xFF (option: CHARBOUND)
- added the ruby method String#chars, which is returning an array of UTF-8 encoded grapheme clusters
- added NLF2LF transformation in postgresql unifold function
- added the DECOMPOSE option, if you neither use COMPOSE or DECOMPOSE, no normalization will be performed (different from previous versions)
- using integer constants rather than C-strings for character properties
- fixed (hopefully) a problem with the ruby library on Mac OS X, which occured when compiler optimization was switched on
author jbe
date Fri Aug 04 12:00:00 2006 +0200 (2006-08-04)
parents 61a89ecc2fb9
children 4ee0d5f54af1
line diff
     1.1 --- a/ruby/utf8proc_native.c	Tue Jun 20 12:00:00 2006 +0200
     1.2 +++ b/ruby/utf8proc_native.c	Fri Aug 04 12:00:00 2006 +0200
     1.3 @@ -33,8 +33,8 @@
     1.4  
     1.5  /*
     1.6   *  File name:    ruby/utf8proc_native.c
     1.7 - *  Version:      0.2
     1.8 - *  Last changed: 2006-06-20
     1.9 + *  Version:      0.3
    1.10 + *  Last changed: 2006-08-04
    1.11   *
    1.12   *  Description:
    1.13   *  Native part of the ruby wrapper for libutf8proc.
    1.14 @@ -90,7 +90,10 @@
    1.15      utf8proc_ruby_mapenv_free, env);
    1.16    result = utf8proc_decompose(RSTRING(str)->ptr, RSTRING(str)->len,
    1.17      NULL, 0, options);
    1.18 -  if (result < 0) utf8proc_ruby_map_error(result);
    1.19 +  if (result < 0) {
    1.20 +    utf8proc_ruby_map_error(result);
    1.21 +    return Qnil;  // needed to prevent problems with optimization
    1.22 +  }
    1.23    env->buffer = ALLOC_N(int32_t, result+1);
    1.24    result = utf8proc_decompose(RSTRING(str)->ptr, RSTRING(str)->len,
    1.25      env->buffer, result, options);
    1.26 @@ -98,12 +101,14 @@
    1.27      free(env->buffer);
    1.28      env->buffer = 0;
    1.29      utf8proc_ruby_map_error(result);
    1.30 +    return Qnil;  // needed to prevent problems with optimization
    1.31    }
    1.32    result = utf8proc_reencode(env->buffer, result, options);
    1.33    if (result < 0) {
    1.34      free(env->buffer);
    1.35      env->buffer = 0;
    1.36      utf8proc_ruby_map_error(result);
    1.37 +    return Qnil;  // needed to prevent problems with optimization
    1.38    }
    1.39    retval = rb_str_new((char *)env->buffer, result);
    1.40    free(env->buffer);
    1.41 @@ -140,16 +145,18 @@
    1.42      utf8proc_ruby_module, "CodeNotAssignedError",
    1.43      utf8proc_ruby_eUnicodeError);
    1.44    utf8proc_ruby_options = rb_hash_new();
    1.45 -  register_utf8proc_option("stable",   UTF8PROC_STABLE);
    1.46 -  register_utf8proc_option("compat",   UTF8PROC_COMPAT);
    1.47 -  register_utf8proc_option("compose",  UTF8PROC_COMPOSE);
    1.48 -  register_utf8proc_option("ignore",   UTF8PROC_IGNORE);
    1.49 -  register_utf8proc_option("rejectna", UTF8PROC_REJECTNA);
    1.50 -  register_utf8proc_option("nlf2ls",   UTF8PROC_NLF2LS);
    1.51 -  register_utf8proc_option("nlf2ps",   UTF8PROC_NLF2PS);
    1.52 -  register_utf8proc_option("nlf2lf",   UTF8PROC_NLF2LF);
    1.53 -  register_utf8proc_option("stripcc",  UTF8PROC_STRIPCC);
    1.54 -  register_utf8proc_option("casefold", UTF8PROC_CASEFOLD);
    1.55 +  register_utf8proc_option("stable",    UTF8PROC_STABLE);
    1.56 +  register_utf8proc_option("compat",    UTF8PROC_COMPAT);
    1.57 +  register_utf8proc_option("compose",   UTF8PROC_COMPOSE);
    1.58 +  register_utf8proc_option("decompose", UTF8PROC_DECOMPOSE);
    1.59 +  register_utf8proc_option("ignore",    UTF8PROC_IGNORE);
    1.60 +  register_utf8proc_option("rejectna",  UTF8PROC_REJECTNA);
    1.61 +  register_utf8proc_option("nlf2ls",    UTF8PROC_NLF2LS);
    1.62 +  register_utf8proc_option("nlf2ps",    UTF8PROC_NLF2PS);
    1.63 +  register_utf8proc_option("nlf2lf",    UTF8PROC_NLF2LF);
    1.64 +  register_utf8proc_option("stripcc",   UTF8PROC_STRIPCC);
    1.65 +  register_utf8proc_option("casefold",  UTF8PROC_CASEFOLD);
    1.66 +  register_utf8proc_option("charbound", UTF8PROC_CHARBOUND);
    1.67    OBJ_FREEZE(utf8proc_ruby_options);
    1.68    rb_define_const(utf8proc_ruby_module, "Options", utf8proc_ruby_options);
    1.69  }

Impressum / About Us