utf8proc
view ruby/utf8proc_native.c @ 0:a0368662434c
Version 0.1
| author | jbe | 
|---|---|
| date | Fri Jun 02 12:00:00 2006 +0200 (2006-06-02) | 
| parents | |
| children | 61a89ecc2fb9 | 
 line source
     1 /*
     2  *  Copyright (c) 2006, FlexiGuided GmbH, Berlin, Germany
     3  *  Author: Jan Behrens <jan.behrens@flexiguided.de>
     4  *  All rights reserved.
     5  *
     6  *  Redistribution and use in source and binary forms, with or without
     7  *  modification, are permitted provided that the following conditions are
     8  *  met:
     9  *
    10  *  1. Redistributions of source code must retain the above copyright
    11  *     notice, this list of conditions and the following disclaimer.
    12  *  2. Redistributions in binary form must reproduce the above copyright
    13  *     notice, this list of conditions and the following disclaimer in the
    14  *     documentation and/or other materials provided with the distribution.
    15  *  3. Neither the name of the FlexiGuided GmbH nor the names of its
    16  *     contributors may be used to endorse or promote products derived from
    17  *     this software without specific prior written permission.
    18  *
    19  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    20  *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    21  *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    22  *  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
    23  *  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    24  *  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    25  *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
    26  *  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
    27  *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
    28  *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
    29  *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    30  *
    31  */
    34 /*
    35  *  File name:    ruby/utf8proc_native.c
    36  *  Version:      0.1
    37  *  Last changed: 2006-05-31
    38  *
    39  *  Description:
    40  *  Native part of the ruby wrapper for libutf8proc.
    41  */
    44 #include "../utf8proc.c"
    45 #include "ruby.h"
    47 typedef struct utf8proc_ruby_mapenv_struct {
    48   int32_t *buffer;
    49 } utf8proc_ruby_mapenv_t;
    51 void utf8proc_ruby_mapenv_free(utf8proc_ruby_mapenv_t *env) {
    52   free(env->buffer);
    53   free(env);
    54 }
    56 VALUE utf8proc_ruby_module;
    57 VALUE utf8proc_ruby_options;
    58 VALUE utf8proc_ruby_eUnicodeError;
    59 VALUE utf8proc_ruby_eInvalidUtf8Error;
    60 VALUE utf8proc_ruby_eCodeNotAssignedError;
    62 VALUE utf8proc_ruby_map_error(ssize_t result) {
    63   VALUE excpt_class;
    64   switch (result) {
    65     case UTF8PROC_ERROR_NOMEM:
    66     excpt_class = rb_eNoMemError; break;
    67     case UTF8PROC_ERROR_OVERFLOW:
    68     excpt_class = rb_eArgError; break;
    69     case UTF8PROC_ERROR_INVALIDUTF8:
    70     excpt_class = utf8proc_ruby_eInvalidUtf8Error; break;
    71     case UTF8PROC_ERROR_NOTASSIGNED:
    72     excpt_class = utf8proc_ruby_eCodeNotAssignedError; break;
    73     default:
    74     excpt_class = rb_eRuntimeError;
    75   }
    76   rb_raise(excpt_class, "%s", utf8proc_errmsg(result));
    77   return Qnil;
    78 }
    80 VALUE utf8proc_ruby_map(VALUE self, VALUE str_param, VALUE options_param) {
    81   VALUE str;
    82   int options;
    83   VALUE env_obj;
    84   utf8proc_ruby_mapenv_t *env;
    85   ssize_t result;
    86   VALUE retval;
    87   str = StringValue(str_param);
    88   options = NUM2INT(options_param) & ~UTF8PROC_NULLTERM;
    89   env_obj = Data_Make_Struct(rb_cObject, utf8proc_ruby_mapenv_t, NULL,
    90     utf8proc_ruby_mapenv_free, env);
    91   result = utf8proc_decompose(RSTRING(str)->ptr, RSTRING(str)->len,
    92     NULL, 0, options);
    93   if (result < 0) utf8proc_ruby_map_error(result);
    94   env->buffer = ALLOC_N(int32_t, result+1);
    95   result = utf8proc_decompose(RSTRING(str)->ptr, RSTRING(str)->len,
    96     env->buffer, result, options);
    97   if (result < 0) {
    98     free(env->buffer);
    99     env->buffer = 0;
   100     func_map_error(result);
   101   }
   102   result = utf8proc_reencode(env->buffer, result, options);
   103   if (result < 0) {
   104     free(env->buffer);
   105     env->buffer = 0;
   106     func_map_error(result);
   107   }
   108   retval = rb_str_new((char *)env->buffer, result);
   109   free(env->buffer);
   110   env->buffer = 0;
   111   return retval;
   112 }
   114 static VALUE utf8proc_ruby_char(VALUE self, VALUE code_param) {
   115   char buffer[4];
   116   ssize_t result;
   117   int uc;
   118   uc = NUM2INT(code_param);
   119   if (uc < 0 || ((uc & 0xFFFF) >= 0xFFFE) || (uc >= 0xD800 && uc < 0xE000) ||
   120       (uc >= 0xFDD0 && uc < 0xFDF0))
   121     rb_raise(rb_eArgError, "Invalid Unicode code point");
   122   result = utf8proc_encode_char(uc, buffer);
   123   return rb_str_new(buffer, result);
   124 }
   126 #define register_utf8proc_option(sym, field) \
   127   rb_hash_aset(utf8proc_ruby_options, ID2SYM(rb_intern(sym)), INT2FIX(field))
   129 void Init_utf8proc_native() {
   130   utf8proc_ruby_module = rb_define_module("Utf8Proc");
   131   rb_define_module_function(utf8proc_ruby_module, "utf8map",
   132     utf8proc_ruby_map, 2);
   133   rb_define_module_function(utf8proc_ruby_module, "utf8char",
   134     utf8proc_ruby_char, 1);
   135   utf8proc_ruby_eUnicodeError = rb_define_class_under(utf8proc_ruby_module,
   136     "UnicodeError", rb_eStandardError);
   137   utf8proc_ruby_eInvalidUtf8Error = rb_define_class_under(
   138     utf8proc_ruby_module, "InvalidUtf8Error", utf8proc_ruby_eUnicodeError);
   139   utf8proc_ruby_eCodeNotAssignedError = rb_define_class_under(
   140     utf8proc_ruby_module, "CodeNotAssignedError",
   141     utf8proc_ruby_eUnicodeError);
   142   utf8proc_ruby_options = rb_hash_new();
   143   register_utf8proc_option("stable",   UTF8PROC_STABLE);
   144   register_utf8proc_option("compat",   UTF8PROC_COMPAT);
   145   register_utf8proc_option("compose",  UTF8PROC_COMPOSE);
   146   register_utf8proc_option("ignore",   UTF8PROC_IGNORE);
   147   register_utf8proc_option("rejectna", UTF8PROC_REJECTNA);
   148   register_utf8proc_option("nlf2ls",   UTF8PROC_NLF2LS);
   149   register_utf8proc_option("nlf2ps",   UTF8PROC_NLF2PS);
   150   register_utf8proc_option("nlf2lf",   UTF8PROC_NLF2LF);
   151   register_utf8proc_option("stripcc",  UTF8PROC_STRIPCC);
   152   register_utf8proc_option("casefold", UTF8PROC_CASEFOLD);
   153   OBJ_FREEZE(utf8proc_ruby_options);
   154   rb_define_const(utf8proc_ruby_module, "Options", utf8proc_ruby_options);
   155 }
