utf8proc
view ruby/utf8proc_native.c @ 17:47b467f4c128
Contribution from libmojibake fork (missing file "normtest.c")
| author | Jiahao Chen, Steven G. Johnson, Anthony David Kelman | 
|---|---|
| date | Mon Dec 01 14:32:19 2014 -0500 (2014-12-01) | 
| parents | d79da2302625 | 
| children | 
 line source
     1 /*
     2  *  Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
     3  *
     4  *  Permission is hereby granted, free of charge, to any person obtaining a
     5  *  copy of this software and associated documentation files (the "Software"),
     6  *  to deal in the Software without restriction, including without limitation
     7  *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
     8  *  and/or sell copies of the Software, and to permit persons to whom the
     9  *  Software is furnished to do so, subject to the following conditions:
    10  *
    11  *  The above copyright notice and this permission notice shall be included in
    12  *  all copies or substantial portions of the Software.
    13  *
    14  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    15  *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    16  *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    17  *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    18  *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    19  *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
    20  *  DEALINGS IN THE SOFTWARE.
    21  */
    24 /*
    25  *  File name:    ruby/utf8proc_native.c
    26  *
    27  *  Description:
    28  *  Native part of the ruby wrapper for libutf8proc.
    29  */
    32 #include "../utf8proc.c"
    33 #include "ruby.h"
    35 #ifndef RSTRING_PTR
    36 #define RSTRING_PTR(s) (RSTRING(s)->ptr)
    37 #endif
    38 #ifndef RSTRING_LEN
    39 #define RSTRING_LEN(s) (RSTRING(s)->len)
    40 #endif
    42 typedef struct utf8proc_ruby_mapenv_struct {
    43   int32_t *buffer;
    44 } utf8proc_ruby_mapenv_t;
    46 void utf8proc_ruby_mapenv_free(utf8proc_ruby_mapenv_t *env) {
    47   free(env->buffer);
    48   free(env);
    49 }
    51 VALUE utf8proc_ruby_module;
    52 VALUE utf8proc_ruby_options;
    53 VALUE utf8proc_ruby_eUnicodeError;
    54 VALUE utf8proc_ruby_eInvalidUtf8Error;
    55 VALUE utf8proc_ruby_eCodeNotAssignedError;
    57 VALUE utf8proc_ruby_map_error(ssize_t result) {
    58   VALUE excpt_class;
    59   switch (result) {
    60     case UTF8PROC_ERROR_NOMEM:
    61     excpt_class = rb_eNoMemError; break;
    62     case UTF8PROC_ERROR_OVERFLOW:
    63     case UTF8PROC_ERROR_INVALIDOPTS:
    64     excpt_class = rb_eArgError; break;
    65     case UTF8PROC_ERROR_INVALIDUTF8:
    66     excpt_class = utf8proc_ruby_eInvalidUtf8Error; break;
    67     case UTF8PROC_ERROR_NOTASSIGNED:
    68     excpt_class = utf8proc_ruby_eCodeNotAssignedError; break;
    69     default:
    70     excpt_class = rb_eRuntimeError;
    71   }
    72   rb_raise(excpt_class, "%s", utf8proc_errmsg(result));
    73   return Qnil;
    74 }
    76 VALUE utf8proc_ruby_map(VALUE self, VALUE str_param, VALUE options_param) {
    77   VALUE str;
    78   int options;
    79   VALUE env_obj;
    80   utf8proc_ruby_mapenv_t *env;
    81   ssize_t result;
    82   VALUE retval;
    83   str = StringValue(str_param);
    84   options = NUM2INT(options_param) & ~UTF8PROC_NULLTERM;
    85   env_obj = Data_Make_Struct(rb_cObject, utf8proc_ruby_mapenv_t, NULL,
    86     utf8proc_ruby_mapenv_free, env);
    87   result = utf8proc_decompose(RSTRING_PTR(str), RSTRING_LEN(str),
    88     NULL, 0, options);
    89   if (result < 0) {
    90     utf8proc_ruby_map_error(result);
    91     return Qnil;  /* needed to prevent problems with optimization */
    92   }
    93   env->buffer = ALLOC_N(int32_t, result+1);
    94   result = utf8proc_decompose(RSTRING_PTR(str), RSTRING_LEN(str),
    95     env->buffer, result, options);
    96   if (result < 0) {
    97     free(env->buffer);
    98     env->buffer = 0;
    99     utf8proc_ruby_map_error(result);
   100     return Qnil;  /* needed to prevent problems with optimization */
   101   }
   102   result = utf8proc_reencode(env->buffer, result, options);
   103   if (result < 0) {
   104     free(env->buffer);
   105     env->buffer = 0;
   106     utf8proc_ruby_map_error(result);
   107     return Qnil;  /* needed to prevent problems with optimization */
   108   }
   109   retval = rb_str_new((char *)env->buffer, result);
   110   free(env->buffer);
   111   env->buffer = 0;
   112   return retval;
   113 }
   115 static VALUE utf8proc_ruby_char(VALUE self, VALUE code_param) {
   116   char buffer[4];
   117   ssize_t result;
   118   int uc;
   119   uc = NUM2INT(code_param);
   120   if (!utf8proc_codepoint_valid(uc))
   121     rb_raise(rb_eArgError, "Invalid Unicode code point");
   122   result = utf8proc_encode_char(uc, buffer);
   123   return rb_str_new(buffer, result);
   124 }
   126 #define register_utf8proc_option(sym, field) \
   127   rb_hash_aset(utf8proc_ruby_options, ID2SYM(rb_intern(sym)), INT2FIX(field))
   129 void Init_utf8proc_native() {
   130   utf8proc_ruby_module = rb_define_module("Utf8Proc");
   131   rb_define_module_function(utf8proc_ruby_module, "utf8map",
   132     utf8proc_ruby_map, 2);
   133   rb_define_module_function(utf8proc_ruby_module, "utf8char",
   134     utf8proc_ruby_char, 1);
   135   utf8proc_ruby_eUnicodeError = rb_define_class_under(utf8proc_ruby_module,
   136     "UnicodeError", rb_eStandardError);
   137   utf8proc_ruby_eInvalidUtf8Error = rb_define_class_under(
   138     utf8proc_ruby_module, "InvalidUtf8Error", utf8proc_ruby_eUnicodeError);
   139   utf8proc_ruby_eCodeNotAssignedError = rb_define_class_under(
   140     utf8proc_ruby_module, "CodeNotAssignedError",
   141     utf8proc_ruby_eUnicodeError);
   142   utf8proc_ruby_options = rb_hash_new();
   143   register_utf8proc_option("stable",    UTF8PROC_STABLE);
   144   register_utf8proc_option("compat",    UTF8PROC_COMPAT);
   145   register_utf8proc_option("compose",   UTF8PROC_COMPOSE);
   146   register_utf8proc_option("decompose", UTF8PROC_DECOMPOSE);
   147   register_utf8proc_option("ignore",    UTF8PROC_IGNORE);
   148   register_utf8proc_option("rejectna",  UTF8PROC_REJECTNA);
   149   register_utf8proc_option("nlf2ls",    UTF8PROC_NLF2LS);
   150   register_utf8proc_option("nlf2ps",    UTF8PROC_NLF2PS);
   151   register_utf8proc_option("nlf2lf",    UTF8PROC_NLF2LF);
   152   register_utf8proc_option("stripcc",   UTF8PROC_STRIPCC);
   153   register_utf8proc_option("casefold",  UTF8PROC_CASEFOLD);
   154   register_utf8proc_option("charbound", UTF8PROC_CHARBOUND);
   155   register_utf8proc_option("lump",      UTF8PROC_LUMP);
   156   register_utf8proc_option("stripmark", UTF8PROC_STRIPMARK);
   157   OBJ_FREEZE(utf8proc_ruby_options);
   158   rb_define_const(utf8proc_ruby_module, "Options", utf8proc_ruby_options);
   159 }
