| rev | line source | 
| jbe@0 | 1 /* | 
| jbe@0 | 2  *  Copyright (c) 2006, FlexiGuided GmbH, Berlin, Germany | 
| jbe@0 | 3  *  Author: Jan Behrens <jan.behrens@flexiguided.de> | 
| jbe@0 | 4  *  All rights reserved. | 
| jbe@0 | 5  * | 
| jbe@0 | 6  *  Redistribution and use in source and binary forms, with or without | 
| jbe@0 | 7  *  modification, are permitted provided that the following conditions are | 
| jbe@0 | 8  *  met: | 
| jbe@0 | 9  * | 
| jbe@0 | 10  *  1. Redistributions of source code must retain the above copyright | 
| jbe@0 | 11  *     notice, this list of conditions and the following disclaimer. | 
| jbe@0 | 12  *  2. Redistributions in binary form must reproduce the above copyright | 
| jbe@0 | 13  *     notice, this list of conditions and the following disclaimer in the | 
| jbe@0 | 14  *     documentation and/or other materials provided with the distribution. | 
| jbe@0 | 15  *  3. Neither the name of the FlexiGuided GmbH nor the names of its | 
| jbe@0 | 16  *     contributors may be used to endorse or promote products derived from | 
| jbe@0 | 17  *     this software without specific prior written permission. | 
| jbe@0 | 18  * | 
| jbe@0 | 19  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 
| jbe@0 | 20  *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 
| jbe@0 | 21  *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A | 
| jbe@0 | 22  *  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER | 
| jbe@0 | 23  *  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | 
| jbe@0 | 24  *  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | 
| jbe@0 | 25  *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | 
| jbe@0 | 26  *  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | 
| jbe@0 | 27  *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | 
| jbe@0 | 28  *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | 
| jbe@0 | 29  *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
| jbe@0 | 30  * | 
| jbe@0 | 31  */ | 
| jbe@0 | 32 | 
| jbe@0 | 33 | 
| jbe@0 | 34 /* | 
| jbe@0 | 35  *  File name:    ruby/utf8proc_native.c | 
| jbe@0 | 36  *  Version:      0.1 | 
| jbe@0 | 37  *  Last changed: 2006-05-31 | 
| jbe@0 | 38  * | 
| jbe@0 | 39  *  Description: | 
| jbe@0 | 40  *  Native part of the ruby wrapper for libutf8proc. | 
| jbe@0 | 41  */ | 
| jbe@0 | 42 | 
| jbe@0 | 43 | 
| jbe@0 | 44 #include "../utf8proc.c" | 
| jbe@0 | 45 #include "ruby.h" | 
| jbe@0 | 46 | 
| jbe@0 | 47 typedef struct utf8proc_ruby_mapenv_struct { | 
| jbe@0 | 48   int32_t *buffer; | 
| jbe@0 | 49 } utf8proc_ruby_mapenv_t; | 
| jbe@0 | 50 | 
| jbe@0 | 51 void utf8proc_ruby_mapenv_free(utf8proc_ruby_mapenv_t *env) { | 
| jbe@0 | 52   free(env->buffer); | 
| jbe@0 | 53   free(env); | 
| jbe@0 | 54 } | 
| jbe@0 | 55 | 
| jbe@0 | 56 VALUE utf8proc_ruby_module; | 
| jbe@0 | 57 VALUE utf8proc_ruby_options; | 
| jbe@0 | 58 VALUE utf8proc_ruby_eUnicodeError; | 
| jbe@0 | 59 VALUE utf8proc_ruby_eInvalidUtf8Error; | 
| jbe@0 | 60 VALUE utf8proc_ruby_eCodeNotAssignedError; | 
| jbe@0 | 61 | 
| jbe@0 | 62 VALUE utf8proc_ruby_map_error(ssize_t result) { | 
| jbe@0 | 63   VALUE excpt_class; | 
| jbe@0 | 64   switch (result) { | 
| jbe@0 | 65     case UTF8PROC_ERROR_NOMEM: | 
| jbe@0 | 66     excpt_class = rb_eNoMemError; break; | 
| jbe@0 | 67     case UTF8PROC_ERROR_OVERFLOW: | 
| jbe@0 | 68     excpt_class = rb_eArgError; break; | 
| jbe@0 | 69     case UTF8PROC_ERROR_INVALIDUTF8: | 
| jbe@0 | 70     excpt_class = utf8proc_ruby_eInvalidUtf8Error; break; | 
| jbe@0 | 71     case UTF8PROC_ERROR_NOTASSIGNED: | 
| jbe@0 | 72     excpt_class = utf8proc_ruby_eCodeNotAssignedError; break; | 
| jbe@0 | 73     default: | 
| jbe@0 | 74     excpt_class = rb_eRuntimeError; | 
| jbe@0 | 75   } | 
| jbe@0 | 76   rb_raise(excpt_class, "%s", utf8proc_errmsg(result)); | 
| jbe@0 | 77   return Qnil; | 
| jbe@0 | 78 } | 
| jbe@0 | 79 | 
| jbe@0 | 80 VALUE utf8proc_ruby_map(VALUE self, VALUE str_param, VALUE options_param) { | 
| jbe@0 | 81   VALUE str; | 
| jbe@0 | 82   int options; | 
| jbe@0 | 83   VALUE env_obj; | 
| jbe@0 | 84   utf8proc_ruby_mapenv_t *env; | 
| jbe@0 | 85   ssize_t result; | 
| jbe@0 | 86   VALUE retval; | 
| jbe@0 | 87   str = StringValue(str_param); | 
| jbe@0 | 88   options = NUM2INT(options_param) & ~UTF8PROC_NULLTERM; | 
| jbe@0 | 89   env_obj = Data_Make_Struct(rb_cObject, utf8proc_ruby_mapenv_t, NULL, | 
| jbe@0 | 90     utf8proc_ruby_mapenv_free, env); | 
| jbe@0 | 91   result = utf8proc_decompose(RSTRING(str)->ptr, RSTRING(str)->len, | 
| jbe@0 | 92     NULL, 0, options); | 
| jbe@0 | 93   if (result < 0) utf8proc_ruby_map_error(result); | 
| jbe@0 | 94   env->buffer = ALLOC_N(int32_t, result+1); | 
| jbe@0 | 95   result = utf8proc_decompose(RSTRING(str)->ptr, RSTRING(str)->len, | 
| jbe@0 | 96     env->buffer, result, options); | 
| jbe@0 | 97   if (result < 0) { | 
| jbe@0 | 98     free(env->buffer); | 
| jbe@0 | 99     env->buffer = 0; | 
| jbe@0 | 100     func_map_error(result); | 
| jbe@0 | 101   } | 
| jbe@0 | 102   result = utf8proc_reencode(env->buffer, result, options); | 
| jbe@0 | 103   if (result < 0) { | 
| jbe@0 | 104     free(env->buffer); | 
| jbe@0 | 105     env->buffer = 0; | 
| jbe@0 | 106     func_map_error(result); | 
| jbe@0 | 107   } | 
| jbe@0 | 108   retval = rb_str_new((char *)env->buffer, result); | 
| jbe@0 | 109   free(env->buffer); | 
| jbe@0 | 110   env->buffer = 0; | 
| jbe@0 | 111   return retval; | 
| jbe@0 | 112 } | 
| jbe@0 | 113 | 
| jbe@0 | 114 static VALUE utf8proc_ruby_char(VALUE self, VALUE code_param) { | 
| jbe@0 | 115   char buffer[4]; | 
| jbe@0 | 116   ssize_t result; | 
| jbe@0 | 117   int uc; | 
| jbe@0 | 118   uc = NUM2INT(code_param); | 
| jbe@0 | 119   if (uc < 0 || ((uc & 0xFFFF) >= 0xFFFE) || (uc >= 0xD800 && uc < 0xE000) || | 
| jbe@0 | 120       (uc >= 0xFDD0 && uc < 0xFDF0)) | 
| jbe@0 | 121     rb_raise(rb_eArgError, "Invalid Unicode code point"); | 
| jbe@0 | 122   result = utf8proc_encode_char(uc, buffer); | 
| jbe@0 | 123   return rb_str_new(buffer, result); | 
| jbe@0 | 124 } | 
| jbe@0 | 125 | 
| jbe@0 | 126 #define register_utf8proc_option(sym, field) \ | 
| jbe@0 | 127   rb_hash_aset(utf8proc_ruby_options, ID2SYM(rb_intern(sym)), INT2FIX(field)) | 
| jbe@0 | 128 | 
| jbe@0 | 129 void Init_utf8proc_native() { | 
| jbe@0 | 130   utf8proc_ruby_module = rb_define_module("Utf8Proc"); | 
| jbe@0 | 131   rb_define_module_function(utf8proc_ruby_module, "utf8map", | 
| jbe@0 | 132     utf8proc_ruby_map, 2); | 
| jbe@0 | 133   rb_define_module_function(utf8proc_ruby_module, "utf8char", | 
| jbe@0 | 134     utf8proc_ruby_char, 1); | 
| jbe@0 | 135   utf8proc_ruby_eUnicodeError = rb_define_class_under(utf8proc_ruby_module, | 
| jbe@0 | 136     "UnicodeError", rb_eStandardError); | 
| jbe@0 | 137   utf8proc_ruby_eInvalidUtf8Error = rb_define_class_under( | 
| jbe@0 | 138     utf8proc_ruby_module, "InvalidUtf8Error", utf8proc_ruby_eUnicodeError); | 
| jbe@0 | 139   utf8proc_ruby_eCodeNotAssignedError = rb_define_class_under( | 
| jbe@0 | 140     utf8proc_ruby_module, "CodeNotAssignedError", | 
| jbe@0 | 141     utf8proc_ruby_eUnicodeError); | 
| jbe@0 | 142   utf8proc_ruby_options = rb_hash_new(); | 
| jbe@0 | 143   register_utf8proc_option("stable",   UTF8PROC_STABLE); | 
| jbe@0 | 144   register_utf8proc_option("compat",   UTF8PROC_COMPAT); | 
| jbe@0 | 145   register_utf8proc_option("compose",  UTF8PROC_COMPOSE); | 
| jbe@0 | 146   register_utf8proc_option("ignore",   UTF8PROC_IGNORE); | 
| jbe@0 | 147   register_utf8proc_option("rejectna", UTF8PROC_REJECTNA); | 
| jbe@0 | 148   register_utf8proc_option("nlf2ls",   UTF8PROC_NLF2LS); | 
| jbe@0 | 149   register_utf8proc_option("nlf2ps",   UTF8PROC_NLF2PS); | 
| jbe@0 | 150   register_utf8proc_option("nlf2lf",   UTF8PROC_NLF2LF); | 
| jbe@0 | 151   register_utf8proc_option("stripcc",  UTF8PROC_STRIPCC); | 
| jbe@0 | 152   register_utf8proc_option("casefold", UTF8PROC_CASEFOLD); | 
| jbe@0 | 153   OBJ_FREEZE(utf8proc_ruby_options); | 
| jbe@0 | 154   rb_define_const(utf8proc_ruby_module, "Options", utf8proc_ruby_options); | 
| jbe@0 | 155 } | 
| jbe@0 | 156 | 
| jbe@0 | 157 |