utf8proc
view ruby/utf8proc_native.c @ 4:a49e32490aac
Version 1.0.1
- included a gem file for the ruby version of the library
- included a gem file for the ruby version of the library
| author | jbe | 
|---|---|
| date | Wed Sep 20 12:00:00 2006 +0200 (2006-09-20) | 
| parents | 4ee0d5f54af1 | 
| children | c18366878af9 | 
 line source
     1 /*
     2  *  Copyright (c) 2006, FlexiGuided GmbH, Berlin, Germany
     3  *  Author: Jan Behrens <jan.behrens@flexiguided.de>
     4  *  All rights reserved.
     5  *
     6  *  Redistribution and use in source and binary forms, with or without
     7  *  modification, are permitted provided that the following conditions are
     8  *  met:
     9  *
    10  *  1. Redistributions of source code must retain the above copyright
    11  *     notice, this list of conditions and the following disclaimer.
    12  *  2. Redistributions in binary form must reproduce the above copyright
    13  *     notice, this list of conditions and the following disclaimer in the
    14  *     documentation and/or other materials provided with the distribution.
    15  *  3. Neither the name of the FlexiGuided GmbH nor the names of its
    16  *     contributors may be used to endorse or promote products derived from
    17  *     this software without specific prior written permission.
    18  *
    19  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    20  *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    21  *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    22  *  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
    23  *  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    24  *  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    25  *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
    26  *  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
    27  *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
    28  *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
    29  *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    30  *
    31  */
    34 /*
    35  *  File name:    ruby/utf8proc_native.c
    36  *  Version:      1.0
    37  *  Last changed: 2006-09-17
    38  *
    39  *  Description:
    40  *  Native part of the ruby wrapper for libutf8proc.
    41  */
    44 #include "../utf8proc.c"
    45 #include "ruby.h"
    47 typedef struct utf8proc_ruby_mapenv_struct {
    48   int32_t *buffer;
    49 } utf8proc_ruby_mapenv_t;
    51 void utf8proc_ruby_mapenv_free(utf8proc_ruby_mapenv_t *env) {
    52   free(env->buffer);
    53   free(env);
    54 }
    56 VALUE utf8proc_ruby_module;
    57 VALUE utf8proc_ruby_options;
    58 VALUE utf8proc_ruby_eUnicodeError;
    59 VALUE utf8proc_ruby_eInvalidUtf8Error;
    60 VALUE utf8proc_ruby_eCodeNotAssignedError;
    62 VALUE utf8proc_ruby_map_error(ssize_t result) {
    63   VALUE excpt_class;
    64   switch (result) {
    65     case UTF8PROC_ERROR_NOMEM:
    66     excpt_class = rb_eNoMemError; break;
    67     case UTF8PROC_ERROR_OVERFLOW:
    68     case UTF8PROC_ERROR_INVALIDOPTS:
    69     excpt_class = rb_eArgError; break;
    70     case UTF8PROC_ERROR_INVALIDUTF8:
    71     excpt_class = utf8proc_ruby_eInvalidUtf8Error; break;
    72     case UTF8PROC_ERROR_NOTASSIGNED:
    73     excpt_class = utf8proc_ruby_eCodeNotAssignedError; break;
    74     default:
    75     excpt_class = rb_eRuntimeError;
    76   }
    77   rb_raise(excpt_class, "%s", utf8proc_errmsg(result));
    78   return Qnil;
    79 }
    81 VALUE utf8proc_ruby_map(VALUE self, VALUE str_param, VALUE options_param) {
    82   VALUE str;
    83   int options;
    84   VALUE env_obj;
    85   utf8proc_ruby_mapenv_t *env;
    86   ssize_t result;
    87   VALUE retval;
    88   str = StringValue(str_param);
    89   options = NUM2INT(options_param) & ~UTF8PROC_NULLTERM;
    90   env_obj = Data_Make_Struct(rb_cObject, utf8proc_ruby_mapenv_t, NULL,
    91     utf8proc_ruby_mapenv_free, env);
    92   result = utf8proc_decompose(RSTRING(str)->ptr, RSTRING(str)->len,
    93     NULL, 0, options);
    94   if (result < 0) {
    95     utf8proc_ruby_map_error(result);
    96     return Qnil;  // needed to prevent problems with optimization
    97   }
    98   env->buffer = ALLOC_N(int32_t, result+1);
    99   result = utf8proc_decompose(RSTRING(str)->ptr, RSTRING(str)->len,
   100     env->buffer, result, options);
   101   if (result < 0) {
   102     free(env->buffer);
   103     env->buffer = 0;
   104     utf8proc_ruby_map_error(result);
   105     return Qnil;  // needed to prevent problems with optimization
   106   }
   107   result = utf8proc_reencode(env->buffer, result, options);
   108   if (result < 0) {
   109     free(env->buffer);
   110     env->buffer = 0;
   111     utf8proc_ruby_map_error(result);
   112     return Qnil;  // needed to prevent problems with optimization
   113   }
   114   retval = rb_str_new((char *)env->buffer, result);
   115   free(env->buffer);
   116   env->buffer = 0;
   117   return retval;
   118 }
   120 static VALUE utf8proc_ruby_char(VALUE self, VALUE code_param) {
   121   char buffer[4];
   122   ssize_t result;
   123   int uc;
   124   uc = NUM2INT(code_param);
   125   if (uc < 0 || ((uc & 0xFFFF) >= 0xFFFE) || (uc >= 0xD800 && uc < 0xE000) ||
   126       (uc >= 0xFDD0 && uc < 0xFDF0))
   127     rb_raise(rb_eArgError, "Invalid Unicode code point");
   128   result = utf8proc_encode_char(uc, buffer);
   129   return rb_str_new(buffer, result);
   130 }
   132 #define register_utf8proc_option(sym, field) \
   133   rb_hash_aset(utf8proc_ruby_options, ID2SYM(rb_intern(sym)), INT2FIX(field))
   135 void Init_utf8proc_native() {
   136   utf8proc_ruby_module = rb_define_module("Utf8Proc");
   137   rb_define_module_function(utf8proc_ruby_module, "utf8map",
   138     utf8proc_ruby_map, 2);
   139   rb_define_module_function(utf8proc_ruby_module, "utf8char",
   140     utf8proc_ruby_char, 1);
   141   utf8proc_ruby_eUnicodeError = rb_define_class_under(utf8proc_ruby_module,
   142     "UnicodeError", rb_eStandardError);
   143   utf8proc_ruby_eInvalidUtf8Error = rb_define_class_under(
   144     utf8proc_ruby_module, "InvalidUtf8Error", utf8proc_ruby_eUnicodeError);
   145   utf8proc_ruby_eCodeNotAssignedError = rb_define_class_under(
   146     utf8proc_ruby_module, "CodeNotAssignedError",
   147     utf8proc_ruby_eUnicodeError);
   148   utf8proc_ruby_options = rb_hash_new();
   149   register_utf8proc_option("stable",    UTF8PROC_STABLE);
   150   register_utf8proc_option("compat",    UTF8PROC_COMPAT);
   151   register_utf8proc_option("compose",   UTF8PROC_COMPOSE);
   152   register_utf8proc_option("decompose", UTF8PROC_DECOMPOSE);
   153   register_utf8proc_option("ignore",    UTF8PROC_IGNORE);
   154   register_utf8proc_option("rejectna",  UTF8PROC_REJECTNA);
   155   register_utf8proc_option("nlf2ls",    UTF8PROC_NLF2LS);
   156   register_utf8proc_option("nlf2ps",    UTF8PROC_NLF2PS);
   157   register_utf8proc_option("nlf2lf",    UTF8PROC_NLF2LF);
   158   register_utf8proc_option("stripcc",   UTF8PROC_STRIPCC);
   159   register_utf8proc_option("casefold",  UTF8PROC_CASEFOLD);
   160   register_utf8proc_option("charbound", UTF8PROC_CHARBOUND);
   161   register_utf8proc_option("lump",      UTF8PROC_LUMP);
   162   register_utf8proc_option("stripmark", UTF8PROC_STRIPMARK);
   163   OBJ_FREEZE(utf8proc_ruby_options);
   164   rb_define_const(utf8proc_ruby_module, "Options", utf8proc_ruby_options);
   165 }
