jbe@0: /* jbe@0: * Copyright (c) 2006, FlexiGuided GmbH, Berlin, Germany jbe@0: * Author: Jan Behrens jbe@0: * All rights reserved. jbe@0: * jbe@0: * Redistribution and use in source and binary forms, with or without jbe@0: * modification, are permitted provided that the following conditions are jbe@0: * met: jbe@0: * jbe@0: * 1. Redistributions of source code must retain the above copyright jbe@0: * notice, this list of conditions and the following disclaimer. jbe@0: * 2. Redistributions in binary form must reproduce the above copyright jbe@0: * notice, this list of conditions and the following disclaimer in the jbe@0: * documentation and/or other materials provided with the distribution. jbe@0: * 3. Neither the name of the FlexiGuided GmbH nor the names of its jbe@0: * contributors may be used to endorse or promote products derived from jbe@0: * this software without specific prior written permission. jbe@0: * jbe@0: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS jbe@0: * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT jbe@0: * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A jbe@0: * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER jbe@0: * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, jbe@0: * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, jbe@0: * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR jbe@0: * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF jbe@0: * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING jbe@0: * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS jbe@0: * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. jbe@0: * jbe@0: */ jbe@0: jbe@0: jbe@0: /* jbe@0: * File name: ruby/utf8proc_native.c jbe@1: * Version: 0.2 jbe@1: * Last changed: 2006-06-20 jbe@0: * jbe@0: * Description: jbe@0: * Native part of the ruby wrapper for libutf8proc. jbe@0: */ jbe@0: jbe@0: jbe@0: #include "../utf8proc.c" jbe@0: #include "ruby.h" jbe@0: jbe@0: typedef struct utf8proc_ruby_mapenv_struct { jbe@0: int32_t *buffer; jbe@0: } utf8proc_ruby_mapenv_t; jbe@0: jbe@0: void utf8proc_ruby_mapenv_free(utf8proc_ruby_mapenv_t *env) { jbe@0: free(env->buffer); jbe@0: free(env); jbe@0: } jbe@0: jbe@0: VALUE utf8proc_ruby_module; jbe@0: VALUE utf8proc_ruby_options; jbe@0: VALUE utf8proc_ruby_eUnicodeError; jbe@0: VALUE utf8proc_ruby_eInvalidUtf8Error; jbe@0: VALUE utf8proc_ruby_eCodeNotAssignedError; jbe@0: jbe@0: VALUE utf8proc_ruby_map_error(ssize_t result) { jbe@0: VALUE excpt_class; jbe@0: switch (result) { jbe@0: case UTF8PROC_ERROR_NOMEM: jbe@0: excpt_class = rb_eNoMemError; break; jbe@0: case UTF8PROC_ERROR_OVERFLOW: jbe@0: excpt_class = rb_eArgError; break; jbe@0: case UTF8PROC_ERROR_INVALIDUTF8: jbe@0: excpt_class = utf8proc_ruby_eInvalidUtf8Error; break; jbe@0: case UTF8PROC_ERROR_NOTASSIGNED: jbe@0: excpt_class = utf8proc_ruby_eCodeNotAssignedError; break; jbe@0: default: jbe@0: excpt_class = rb_eRuntimeError; jbe@0: } jbe@0: rb_raise(excpt_class, "%s", utf8proc_errmsg(result)); jbe@0: return Qnil; jbe@0: } jbe@0: jbe@0: VALUE utf8proc_ruby_map(VALUE self, VALUE str_param, VALUE options_param) { jbe@0: VALUE str; jbe@0: int options; jbe@0: VALUE env_obj; jbe@0: utf8proc_ruby_mapenv_t *env; jbe@0: ssize_t result; jbe@0: VALUE retval; jbe@0: str = StringValue(str_param); jbe@0: options = NUM2INT(options_param) & ~UTF8PROC_NULLTERM; jbe@0: env_obj = Data_Make_Struct(rb_cObject, utf8proc_ruby_mapenv_t, NULL, jbe@0: utf8proc_ruby_mapenv_free, env); jbe@0: result = utf8proc_decompose(RSTRING(str)->ptr, RSTRING(str)->len, jbe@0: NULL, 0, options); jbe@0: if (result < 0) utf8proc_ruby_map_error(result); jbe@0: env->buffer = ALLOC_N(int32_t, result+1); jbe@0: result = utf8proc_decompose(RSTRING(str)->ptr, RSTRING(str)->len, jbe@0: env->buffer, result, options); jbe@0: if (result < 0) { jbe@0: free(env->buffer); jbe@0: env->buffer = 0; jbe@1: utf8proc_ruby_map_error(result); jbe@0: } jbe@0: result = utf8proc_reencode(env->buffer, result, options); jbe@0: if (result < 0) { jbe@0: free(env->buffer); jbe@0: env->buffer = 0; jbe@1: utf8proc_ruby_map_error(result); jbe@0: } jbe@0: retval = rb_str_new((char *)env->buffer, result); jbe@0: free(env->buffer); jbe@0: env->buffer = 0; jbe@0: return retval; jbe@0: } jbe@0: jbe@0: static VALUE utf8proc_ruby_char(VALUE self, VALUE code_param) { jbe@0: char buffer[4]; jbe@0: ssize_t result; jbe@0: int uc; jbe@0: uc = NUM2INT(code_param); jbe@0: if (uc < 0 || ((uc & 0xFFFF) >= 0xFFFE) || (uc >= 0xD800 && uc < 0xE000) || jbe@0: (uc >= 0xFDD0 && uc < 0xFDF0)) jbe@0: rb_raise(rb_eArgError, "Invalid Unicode code point"); jbe@0: result = utf8proc_encode_char(uc, buffer); jbe@0: return rb_str_new(buffer, result); jbe@0: } jbe@0: jbe@0: #define register_utf8proc_option(sym, field) \ jbe@0: rb_hash_aset(utf8proc_ruby_options, ID2SYM(rb_intern(sym)), INT2FIX(field)) jbe@0: jbe@0: void Init_utf8proc_native() { jbe@0: utf8proc_ruby_module = rb_define_module("Utf8Proc"); jbe@0: rb_define_module_function(utf8proc_ruby_module, "utf8map", jbe@0: utf8proc_ruby_map, 2); jbe@0: rb_define_module_function(utf8proc_ruby_module, "utf8char", jbe@0: utf8proc_ruby_char, 1); jbe@0: utf8proc_ruby_eUnicodeError = rb_define_class_under(utf8proc_ruby_module, jbe@0: "UnicodeError", rb_eStandardError); jbe@0: utf8proc_ruby_eInvalidUtf8Error = rb_define_class_under( jbe@0: utf8proc_ruby_module, "InvalidUtf8Error", utf8proc_ruby_eUnicodeError); jbe@0: utf8proc_ruby_eCodeNotAssignedError = rb_define_class_under( jbe@0: utf8proc_ruby_module, "CodeNotAssignedError", jbe@0: utf8proc_ruby_eUnicodeError); jbe@0: utf8proc_ruby_options = rb_hash_new(); jbe@0: register_utf8proc_option("stable", UTF8PROC_STABLE); jbe@0: register_utf8proc_option("compat", UTF8PROC_COMPAT); jbe@0: register_utf8proc_option("compose", UTF8PROC_COMPOSE); jbe@0: register_utf8proc_option("ignore", UTF8PROC_IGNORE); jbe@0: register_utf8proc_option("rejectna", UTF8PROC_REJECTNA); jbe@0: register_utf8proc_option("nlf2ls", UTF8PROC_NLF2LS); jbe@0: register_utf8proc_option("nlf2ps", UTF8PROC_NLF2PS); jbe@0: register_utf8proc_option("nlf2lf", UTF8PROC_NLF2LF); jbe@0: register_utf8proc_option("stripcc", UTF8PROC_STRIPCC); jbe@0: register_utf8proc_option("casefold", UTF8PROC_CASEFOLD); jbe@0: OBJ_FREEZE(utf8proc_ruby_options); jbe@0: rb_define_const(utf8proc_ruby_module, "Options", utf8proc_ruby_options); jbe@0: } jbe@0: jbe@0: