jbe@0: /* jbe@7: * Copyright (c) 2006-2007 Jan Behrens, FlexiGuided GmbH, Berlin jbe@0: * jbe@7: * Permission is hereby granted, free of charge, to any person obtaining a jbe@7: * copy of this software and associated documentation files (the "Software"), jbe@7: * to deal in the Software without restriction, including without limitation jbe@7: * the rights to use, copy, modify, merge, publish, distribute, sublicense, jbe@7: * and/or sell copies of the Software, and to permit persons to whom the jbe@7: * Software is furnished to do so, subject to the following conditions: jbe@0: * jbe@7: * The above copyright notice and this permission notice shall be included in jbe@7: * all copies or substantial portions of the Software. jbe@0: * jbe@7: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR jbe@7: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, jbe@7: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE jbe@7: * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER jbe@7: * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING jbe@7: * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER jbe@7: * DEALINGS IN THE SOFTWARE. jbe@0: */ jbe@7: jbe@0: jbe@0: /* jbe@0: * File name: ruby/utf8proc_native.c jbe@7: * Version: 1.1.1 jbe@7: * Last changed: 2007-07-22 jbe@0: * jbe@0: * Description: jbe@0: * Native part of the ruby wrapper for libutf8proc. jbe@0: */ jbe@0: jbe@0: jbe@0: #include "../utf8proc.c" jbe@0: #include "ruby.h" jbe@0: jbe@0: typedef struct utf8proc_ruby_mapenv_struct { jbe@0: int32_t *buffer; jbe@0: } utf8proc_ruby_mapenv_t; jbe@0: jbe@0: void utf8proc_ruby_mapenv_free(utf8proc_ruby_mapenv_t *env) { jbe@0: free(env->buffer); jbe@0: free(env); jbe@0: } jbe@0: jbe@0: VALUE utf8proc_ruby_module; jbe@0: VALUE utf8proc_ruby_options; jbe@0: VALUE utf8proc_ruby_eUnicodeError; jbe@0: VALUE utf8proc_ruby_eInvalidUtf8Error; jbe@0: VALUE utf8proc_ruby_eCodeNotAssignedError; jbe@0: jbe@0: VALUE utf8proc_ruby_map_error(ssize_t result) { jbe@0: VALUE excpt_class; jbe@0: switch (result) { jbe@0: case UTF8PROC_ERROR_NOMEM: jbe@0: excpt_class = rb_eNoMemError; break; jbe@0: case UTF8PROC_ERROR_OVERFLOW: jbe@3: case UTF8PROC_ERROR_INVALIDOPTS: jbe@0: excpt_class = rb_eArgError; break; jbe@0: case UTF8PROC_ERROR_INVALIDUTF8: jbe@0: excpt_class = utf8proc_ruby_eInvalidUtf8Error; break; jbe@0: case UTF8PROC_ERROR_NOTASSIGNED: jbe@0: excpt_class = utf8proc_ruby_eCodeNotAssignedError; break; jbe@0: default: jbe@0: excpt_class = rb_eRuntimeError; jbe@0: } jbe@0: rb_raise(excpt_class, "%s", utf8proc_errmsg(result)); jbe@0: return Qnil; jbe@0: } jbe@0: jbe@0: VALUE utf8proc_ruby_map(VALUE self, VALUE str_param, VALUE options_param) { jbe@0: VALUE str; jbe@0: int options; jbe@0: VALUE env_obj; jbe@0: utf8proc_ruby_mapenv_t *env; jbe@0: ssize_t result; jbe@0: VALUE retval; jbe@0: str = StringValue(str_param); jbe@0: options = NUM2INT(options_param) & ~UTF8PROC_NULLTERM; jbe@0: env_obj = Data_Make_Struct(rb_cObject, utf8proc_ruby_mapenv_t, NULL, jbe@0: utf8proc_ruby_mapenv_free, env); jbe@0: result = utf8proc_decompose(RSTRING(str)->ptr, RSTRING(str)->len, jbe@0: NULL, 0, options); jbe@2: if (result < 0) { jbe@2: utf8proc_ruby_map_error(result); jbe@2: return Qnil; // needed to prevent problems with optimization jbe@2: } jbe@0: env->buffer = ALLOC_N(int32_t, result+1); jbe@0: result = utf8proc_decompose(RSTRING(str)->ptr, RSTRING(str)->len, jbe@0: env->buffer, result, options); jbe@0: if (result < 0) { jbe@0: free(env->buffer); jbe@0: env->buffer = 0; jbe@1: utf8proc_ruby_map_error(result); jbe@2: return Qnil; // needed to prevent problems with optimization jbe@0: } jbe@0: result = utf8proc_reencode(env->buffer, result, options); jbe@0: if (result < 0) { jbe@0: free(env->buffer); jbe@0: env->buffer = 0; jbe@1: utf8proc_ruby_map_error(result); jbe@2: return Qnil; // needed to prevent problems with optimization jbe@0: } jbe@0: retval = rb_str_new((char *)env->buffer, result); jbe@0: free(env->buffer); jbe@0: env->buffer = 0; jbe@0: return retval; jbe@0: } jbe@0: jbe@0: static VALUE utf8proc_ruby_char(VALUE self, VALUE code_param) { jbe@0: char buffer[4]; jbe@0: ssize_t result; jbe@0: int uc; jbe@0: uc = NUM2INT(code_param); jbe@7: if (!utf8proc_codepoint_valid(uc)) jbe@0: rb_raise(rb_eArgError, "Invalid Unicode code point"); jbe@0: result = utf8proc_encode_char(uc, buffer); jbe@0: return rb_str_new(buffer, result); jbe@0: } jbe@0: jbe@0: #define register_utf8proc_option(sym, field) \ jbe@0: rb_hash_aset(utf8proc_ruby_options, ID2SYM(rb_intern(sym)), INT2FIX(field)) jbe@0: jbe@0: void Init_utf8proc_native() { jbe@0: utf8proc_ruby_module = rb_define_module("Utf8Proc"); jbe@0: rb_define_module_function(utf8proc_ruby_module, "utf8map", jbe@0: utf8proc_ruby_map, 2); jbe@0: rb_define_module_function(utf8proc_ruby_module, "utf8char", jbe@0: utf8proc_ruby_char, 1); jbe@0: utf8proc_ruby_eUnicodeError = rb_define_class_under(utf8proc_ruby_module, jbe@0: "UnicodeError", rb_eStandardError); jbe@0: utf8proc_ruby_eInvalidUtf8Error = rb_define_class_under( jbe@0: utf8proc_ruby_module, "InvalidUtf8Error", utf8proc_ruby_eUnicodeError); jbe@0: utf8proc_ruby_eCodeNotAssignedError = rb_define_class_under( jbe@0: utf8proc_ruby_module, "CodeNotAssignedError", jbe@0: utf8proc_ruby_eUnicodeError); jbe@0: utf8proc_ruby_options = rb_hash_new(); jbe@2: register_utf8proc_option("stable", UTF8PROC_STABLE); jbe@2: register_utf8proc_option("compat", UTF8PROC_COMPAT); jbe@2: register_utf8proc_option("compose", UTF8PROC_COMPOSE); jbe@2: register_utf8proc_option("decompose", UTF8PROC_DECOMPOSE); jbe@2: register_utf8proc_option("ignore", UTF8PROC_IGNORE); jbe@2: register_utf8proc_option("rejectna", UTF8PROC_REJECTNA); jbe@2: register_utf8proc_option("nlf2ls", UTF8PROC_NLF2LS); jbe@2: register_utf8proc_option("nlf2ps", UTF8PROC_NLF2PS); jbe@2: register_utf8proc_option("nlf2lf", UTF8PROC_NLF2LF); jbe@2: register_utf8proc_option("stripcc", UTF8PROC_STRIPCC); jbe@2: register_utf8proc_option("casefold", UTF8PROC_CASEFOLD); jbe@2: register_utf8proc_option("charbound", UTF8PROC_CHARBOUND); jbe@3: register_utf8proc_option("lump", UTF8PROC_LUMP); jbe@3: register_utf8proc_option("stripmark", UTF8PROC_STRIPMARK); jbe@0: OBJ_FREEZE(utf8proc_ruby_options); jbe@0: rb_define_const(utf8proc_ruby_module, "Options", utf8proc_ruby_options); jbe@0: } jbe@0: