| rev | line source | 
| jbe@0 | 1 /* | 
| jbe@10 | 2  *  Copyright (c) 2009 Public Software Group e. V., Berlin, Germany | 
| jbe@0 | 3  * | 
| jbe@7 | 4  *  Permission is hereby granted, free of charge, to any person obtaining a | 
| jbe@7 | 5  *  copy of this software and associated documentation files (the "Software"), | 
| jbe@7 | 6  *  to deal in the Software without restriction, including without limitation | 
| jbe@7 | 7  *  the rights to use, copy, modify, merge, publish, distribute, sublicense, | 
| jbe@7 | 8  *  and/or sell copies of the Software, and to permit persons to whom the | 
| jbe@7 | 9  *  Software is furnished to do so, subject to the following conditions: | 
| jbe@0 | 10  * | 
| jbe@7 | 11  *  The above copyright notice and this permission notice shall be included in | 
| jbe@7 | 12  *  all copies or substantial portions of the Software. | 
| jbe@0 | 13  * | 
| jbe@7 | 14  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
| jbe@7 | 15  *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
| jbe@7 | 16  *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | 
| jbe@7 | 17  *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 
| jbe@7 | 18  *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | 
| jbe@7 | 19  *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | 
| jbe@7 | 20  *  DEALINGS IN THE SOFTWARE. | 
| jbe@0 | 21  */ | 
| jbe@7 | 22 | 
| jbe@0 | 23 | 
| jbe@0 | 24 /* | 
| jbe@0 | 25  *  File name:    ruby/utf8proc_native.c | 
| jbe@0 | 26  * | 
| jbe@0 | 27  *  Description: | 
| jbe@0 | 28  *  Native part of the ruby wrapper for libutf8proc. | 
| jbe@0 | 29  */ | 
| jbe@0 | 30 | 
| jbe@0 | 31 | 
| jbe@0 | 32 #include "../utf8proc.c" | 
| jbe@0 | 33 #include "ruby.h" | 
| jbe@0 | 34 | 
| jbe@11 | 35 #ifndef RSTRING_PTR | 
| jbe@11 | 36 #define RSTRING_PTR(s) (RSTRING(s)->ptr) | 
| jbe@11 | 37 #endif | 
| jbe@11 | 38 #ifndef RSTRING_LEN | 
| jbe@11 | 39 #define RSTRING_LEN(s) (RSTRING(s)->len) | 
| jbe@11 | 40 #endif | 
| jbe@11 | 41 | 
| jbe@0 | 42 typedef struct utf8proc_ruby_mapenv_struct { | 
| jbe@0 | 43   int32_t *buffer; | 
| jbe@0 | 44 } utf8proc_ruby_mapenv_t; | 
| jbe@0 | 45 | 
| jbe@0 | 46 void utf8proc_ruby_mapenv_free(utf8proc_ruby_mapenv_t *env) { | 
| jbe@0 | 47   free(env->buffer); | 
| jbe@0 | 48   free(env); | 
| jbe@0 | 49 } | 
| jbe@0 | 50 | 
| jbe@0 | 51 VALUE utf8proc_ruby_module; | 
| jbe@0 | 52 VALUE utf8proc_ruby_options; | 
| jbe@0 | 53 VALUE utf8proc_ruby_eUnicodeError; | 
| jbe@0 | 54 VALUE utf8proc_ruby_eInvalidUtf8Error; | 
| jbe@0 | 55 VALUE utf8proc_ruby_eCodeNotAssignedError; | 
| jbe@0 | 56 | 
| jbe@0 | 57 VALUE utf8proc_ruby_map_error(ssize_t result) { | 
| jbe@0 | 58   VALUE excpt_class; | 
| jbe@0 | 59   switch (result) { | 
| jbe@0 | 60     case UTF8PROC_ERROR_NOMEM: | 
| jbe@0 | 61     excpt_class = rb_eNoMemError; break; | 
| jbe@0 | 62     case UTF8PROC_ERROR_OVERFLOW: | 
| jbe@3 | 63     case UTF8PROC_ERROR_INVALIDOPTS: | 
| jbe@0 | 64     excpt_class = rb_eArgError; break; | 
| jbe@0 | 65     case UTF8PROC_ERROR_INVALIDUTF8: | 
| jbe@0 | 66     excpt_class = utf8proc_ruby_eInvalidUtf8Error; break; | 
| jbe@0 | 67     case UTF8PROC_ERROR_NOTASSIGNED: | 
| jbe@0 | 68     excpt_class = utf8proc_ruby_eCodeNotAssignedError; break; | 
| jbe@0 | 69     default: | 
| jbe@0 | 70     excpt_class = rb_eRuntimeError; | 
| jbe@0 | 71   } | 
| jbe@0 | 72   rb_raise(excpt_class, "%s", utf8proc_errmsg(result)); | 
| jbe@0 | 73   return Qnil; | 
| jbe@0 | 74 } | 
| jbe@0 | 75 | 
| jbe@0 | 76 VALUE utf8proc_ruby_map(VALUE self, VALUE str_param, VALUE options_param) { | 
| jbe@0 | 77   VALUE str; | 
| jbe@0 | 78   int options; | 
| jbe@0 | 79   VALUE env_obj; | 
| jbe@0 | 80   utf8proc_ruby_mapenv_t *env; | 
| jbe@0 | 81   ssize_t result; | 
| jbe@0 | 82   VALUE retval; | 
| jbe@0 | 83   str = StringValue(str_param); | 
| jbe@0 | 84   options = NUM2INT(options_param) & ~UTF8PROC_NULLTERM; | 
| jbe@0 | 85   env_obj = Data_Make_Struct(rb_cObject, utf8proc_ruby_mapenv_t, NULL, | 
| jbe@0 | 86     utf8proc_ruby_mapenv_free, env); | 
| jbe@11 | 87   result = utf8proc_decompose(RSTRING_PTR(str), RSTRING_LEN(str), | 
| jbe@0 | 88     NULL, 0, options); | 
| jbe@2 | 89   if (result < 0) { | 
| jbe@2 | 90     utf8proc_ruby_map_error(result); | 
| jbe@10 | 91     return Qnil;  /* needed to prevent problems with optimization */ | 
| jbe@2 | 92   } | 
| jbe@0 | 93   env->buffer = ALLOC_N(int32_t, result+1); | 
| jbe@11 | 94   result = utf8proc_decompose(RSTRING_PTR(str), RSTRING_LEN(str), | 
| jbe@0 | 95     env->buffer, result, options); | 
| jbe@0 | 96   if (result < 0) { | 
| jbe@0 | 97     free(env->buffer); | 
| jbe@0 | 98     env->buffer = 0; | 
| jbe@1 | 99     utf8proc_ruby_map_error(result); | 
| jbe@10 | 100     return Qnil;  /* needed to prevent problems with optimization */ | 
| jbe@0 | 101   } | 
| jbe@0 | 102   result = utf8proc_reencode(env->buffer, result, options); | 
| jbe@0 | 103   if (result < 0) { | 
| jbe@0 | 104     free(env->buffer); | 
| jbe@0 | 105     env->buffer = 0; | 
| jbe@1 | 106     utf8proc_ruby_map_error(result); | 
| jbe@10 | 107     return Qnil;  /* needed to prevent problems with optimization */ | 
| jbe@0 | 108   } | 
| jbe@0 | 109   retval = rb_str_new((char *)env->buffer, result); | 
| jbe@0 | 110   free(env->buffer); | 
| jbe@0 | 111   env->buffer = 0; | 
| jbe@0 | 112   return retval; | 
| jbe@0 | 113 } | 
| jbe@0 | 114 | 
| jbe@0 | 115 static VALUE utf8proc_ruby_char(VALUE self, VALUE code_param) { | 
| jbe@0 | 116   char buffer[4]; | 
| jbe@0 | 117   ssize_t result; | 
| jbe@0 | 118   int uc; | 
| jbe@0 | 119   uc = NUM2INT(code_param); | 
| jbe@7 | 120   if (!utf8proc_codepoint_valid(uc)) | 
| jbe@0 | 121     rb_raise(rb_eArgError, "Invalid Unicode code point"); | 
| jbe@0 | 122   result = utf8proc_encode_char(uc, buffer); | 
| jbe@0 | 123   return rb_str_new(buffer, result); | 
| jbe@0 | 124 } | 
| jbe@0 | 125 | 
| jbe@0 | 126 #define register_utf8proc_option(sym, field) \ | 
| jbe@0 | 127   rb_hash_aset(utf8proc_ruby_options, ID2SYM(rb_intern(sym)), INT2FIX(field)) | 
| jbe@0 | 128 | 
| jbe@0 | 129 void Init_utf8proc_native() { | 
| jbe@0 | 130   utf8proc_ruby_module = rb_define_module("Utf8Proc"); | 
| jbe@0 | 131   rb_define_module_function(utf8proc_ruby_module, "utf8map", | 
| jbe@0 | 132     utf8proc_ruby_map, 2); | 
| jbe@0 | 133   rb_define_module_function(utf8proc_ruby_module, "utf8char", | 
| jbe@0 | 134     utf8proc_ruby_char, 1); | 
| jbe@0 | 135   utf8proc_ruby_eUnicodeError = rb_define_class_under(utf8proc_ruby_module, | 
| jbe@0 | 136     "UnicodeError", rb_eStandardError); | 
| jbe@0 | 137   utf8proc_ruby_eInvalidUtf8Error = rb_define_class_under( | 
| jbe@0 | 138     utf8proc_ruby_module, "InvalidUtf8Error", utf8proc_ruby_eUnicodeError); | 
| jbe@0 | 139   utf8proc_ruby_eCodeNotAssignedError = rb_define_class_under( | 
| jbe@0 | 140     utf8proc_ruby_module, "CodeNotAssignedError", | 
| jbe@0 | 141     utf8proc_ruby_eUnicodeError); | 
| jbe@0 | 142   utf8proc_ruby_options = rb_hash_new(); | 
| jbe@2 | 143   register_utf8proc_option("stable",    UTF8PROC_STABLE); | 
| jbe@2 | 144   register_utf8proc_option("compat",    UTF8PROC_COMPAT); | 
| jbe@2 | 145   register_utf8proc_option("compose",   UTF8PROC_COMPOSE); | 
| jbe@2 | 146   register_utf8proc_option("decompose", UTF8PROC_DECOMPOSE); | 
| jbe@2 | 147   register_utf8proc_option("ignore",    UTF8PROC_IGNORE); | 
| jbe@2 | 148   register_utf8proc_option("rejectna",  UTF8PROC_REJECTNA); | 
| jbe@2 | 149   register_utf8proc_option("nlf2ls",    UTF8PROC_NLF2LS); | 
| jbe@2 | 150   register_utf8proc_option("nlf2ps",    UTF8PROC_NLF2PS); | 
| jbe@2 | 151   register_utf8proc_option("nlf2lf",    UTF8PROC_NLF2LF); | 
| jbe@2 | 152   register_utf8proc_option("stripcc",   UTF8PROC_STRIPCC); | 
| jbe@2 | 153   register_utf8proc_option("casefold",  UTF8PROC_CASEFOLD); | 
| jbe@2 | 154   register_utf8proc_option("charbound", UTF8PROC_CHARBOUND); | 
| jbe@3 | 155   register_utf8proc_option("lump",      UTF8PROC_LUMP); | 
| jbe@3 | 156   register_utf8proc_option("stripmark", UTF8PROC_STRIPMARK); | 
| jbe@0 | 157   OBJ_FREEZE(utf8proc_ruby_options); | 
| jbe@0 | 158   rb_define_const(utf8proc_ruby_module, "Options", utf8proc_ruby_options); | 
| jbe@0 | 159 } | 
| jbe@0 | 160 |