utf8proc
diff ruby/utf8proc_native.c @ 0:a0368662434c
Version 0.1
author | jbe |
---|---|
date | Fri Jun 02 12:00:00 2006 +0200 (2006-06-02) |
parents | |
children | 61a89ecc2fb9 |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/ruby/utf8proc_native.c Fri Jun 02 12:00:00 2006 +0200 1.3 @@ -0,0 +1,157 @@ 1.4 +/* 1.5 + * Copyright (c) 2006, FlexiGuided GmbH, Berlin, Germany 1.6 + * Author: Jan Behrens <jan.behrens@flexiguided.de> 1.7 + * All rights reserved. 1.8 + * 1.9 + * Redistribution and use in source and binary forms, with or without 1.10 + * modification, are permitted provided that the following conditions are 1.11 + * met: 1.12 + * 1.13 + * 1. Redistributions of source code must retain the above copyright 1.14 + * notice, this list of conditions and the following disclaimer. 1.15 + * 2. Redistributions in binary form must reproduce the above copyright 1.16 + * notice, this list of conditions and the following disclaimer in the 1.17 + * documentation and/or other materials provided with the distribution. 1.18 + * 3. Neither the name of the FlexiGuided GmbH nor the names of its 1.19 + * contributors may be used to endorse or promote products derived from 1.20 + * this software without specific prior written permission. 1.21 + * 1.22 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1.23 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 1.24 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 1.25 + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 1.26 + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 1.27 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 1.28 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 1.29 + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 1.30 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 1.31 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 1.32 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1.33 + * 1.34 + */ 1.35 + 1.36 + 1.37 +/* 1.38 + * File name: ruby/utf8proc_native.c 1.39 + * Version: 0.1 1.40 + * Last changed: 2006-05-31 1.41 + * 1.42 + * Description: 1.43 + * Native part of the ruby wrapper for libutf8proc. 1.44 + */ 1.45 + 1.46 + 1.47 +#include "../utf8proc.c" 1.48 +#include "ruby.h" 1.49 + 1.50 +typedef struct utf8proc_ruby_mapenv_struct { 1.51 + int32_t *buffer; 1.52 +} utf8proc_ruby_mapenv_t; 1.53 + 1.54 +void utf8proc_ruby_mapenv_free(utf8proc_ruby_mapenv_t *env) { 1.55 + free(env->buffer); 1.56 + free(env); 1.57 +} 1.58 + 1.59 +VALUE utf8proc_ruby_module; 1.60 +VALUE utf8proc_ruby_options; 1.61 +VALUE utf8proc_ruby_eUnicodeError; 1.62 +VALUE utf8proc_ruby_eInvalidUtf8Error; 1.63 +VALUE utf8proc_ruby_eCodeNotAssignedError; 1.64 + 1.65 +VALUE utf8proc_ruby_map_error(ssize_t result) { 1.66 + VALUE excpt_class; 1.67 + switch (result) { 1.68 + case UTF8PROC_ERROR_NOMEM: 1.69 + excpt_class = rb_eNoMemError; break; 1.70 + case UTF8PROC_ERROR_OVERFLOW: 1.71 + excpt_class = rb_eArgError; break; 1.72 + case UTF8PROC_ERROR_INVALIDUTF8: 1.73 + excpt_class = utf8proc_ruby_eInvalidUtf8Error; break; 1.74 + case UTF8PROC_ERROR_NOTASSIGNED: 1.75 + excpt_class = utf8proc_ruby_eCodeNotAssignedError; break; 1.76 + default: 1.77 + excpt_class = rb_eRuntimeError; 1.78 + } 1.79 + rb_raise(excpt_class, "%s", utf8proc_errmsg(result)); 1.80 + return Qnil; 1.81 +} 1.82 + 1.83 +VALUE utf8proc_ruby_map(VALUE self, VALUE str_param, VALUE options_param) { 1.84 + VALUE str; 1.85 + int options; 1.86 + VALUE env_obj; 1.87 + utf8proc_ruby_mapenv_t *env; 1.88 + ssize_t result; 1.89 + VALUE retval; 1.90 + str = StringValue(str_param); 1.91 + options = NUM2INT(options_param) & ~UTF8PROC_NULLTERM; 1.92 + env_obj = Data_Make_Struct(rb_cObject, utf8proc_ruby_mapenv_t, NULL, 1.93 + utf8proc_ruby_mapenv_free, env); 1.94 + result = utf8proc_decompose(RSTRING(str)->ptr, RSTRING(str)->len, 1.95 + NULL, 0, options); 1.96 + if (result < 0) utf8proc_ruby_map_error(result); 1.97 + env->buffer = ALLOC_N(int32_t, result+1); 1.98 + result = utf8proc_decompose(RSTRING(str)->ptr, RSTRING(str)->len, 1.99 + env->buffer, result, options); 1.100 + if (result < 0) { 1.101 + free(env->buffer); 1.102 + env->buffer = 0; 1.103 + func_map_error(result); 1.104 + } 1.105 + result = utf8proc_reencode(env->buffer, result, options); 1.106 + if (result < 0) { 1.107 + free(env->buffer); 1.108 + env->buffer = 0; 1.109 + func_map_error(result); 1.110 + } 1.111 + retval = rb_str_new((char *)env->buffer, result); 1.112 + free(env->buffer); 1.113 + env->buffer = 0; 1.114 + return retval; 1.115 +} 1.116 + 1.117 +static VALUE utf8proc_ruby_char(VALUE self, VALUE code_param) { 1.118 + char buffer[4]; 1.119 + ssize_t result; 1.120 + int uc; 1.121 + uc = NUM2INT(code_param); 1.122 + if (uc < 0 || ((uc & 0xFFFF) >= 0xFFFE) || (uc >= 0xD800 && uc < 0xE000) || 1.123 + (uc >= 0xFDD0 && uc < 0xFDF0)) 1.124 + rb_raise(rb_eArgError, "Invalid Unicode code point"); 1.125 + result = utf8proc_encode_char(uc, buffer); 1.126 + return rb_str_new(buffer, result); 1.127 +} 1.128 + 1.129 +#define register_utf8proc_option(sym, field) \ 1.130 + rb_hash_aset(utf8proc_ruby_options, ID2SYM(rb_intern(sym)), INT2FIX(field)) 1.131 + 1.132 +void Init_utf8proc_native() { 1.133 + utf8proc_ruby_module = rb_define_module("Utf8Proc"); 1.134 + rb_define_module_function(utf8proc_ruby_module, "utf8map", 1.135 + utf8proc_ruby_map, 2); 1.136 + rb_define_module_function(utf8proc_ruby_module, "utf8char", 1.137 + utf8proc_ruby_char, 1); 1.138 + utf8proc_ruby_eUnicodeError = rb_define_class_under(utf8proc_ruby_module, 1.139 + "UnicodeError", rb_eStandardError); 1.140 + utf8proc_ruby_eInvalidUtf8Error = rb_define_class_under( 1.141 + utf8proc_ruby_module, "InvalidUtf8Error", utf8proc_ruby_eUnicodeError); 1.142 + utf8proc_ruby_eCodeNotAssignedError = rb_define_class_under( 1.143 + utf8proc_ruby_module, "CodeNotAssignedError", 1.144 + utf8proc_ruby_eUnicodeError); 1.145 + utf8proc_ruby_options = rb_hash_new(); 1.146 + register_utf8proc_option("stable", UTF8PROC_STABLE); 1.147 + register_utf8proc_option("compat", UTF8PROC_COMPAT); 1.148 + register_utf8proc_option("compose", UTF8PROC_COMPOSE); 1.149 + register_utf8proc_option("ignore", UTF8PROC_IGNORE); 1.150 + register_utf8proc_option("rejectna", UTF8PROC_REJECTNA); 1.151 + register_utf8proc_option("nlf2ls", UTF8PROC_NLF2LS); 1.152 + register_utf8proc_option("nlf2ps", UTF8PROC_NLF2PS); 1.153 + register_utf8proc_option("nlf2lf", UTF8PROC_NLF2LF); 1.154 + register_utf8proc_option("stripcc", UTF8PROC_STRIPCC); 1.155 + register_utf8proc_option("casefold", UTF8PROC_CASEFOLD); 1.156 + OBJ_FREEZE(utf8proc_ruby_options); 1.157 + rb_define_const(utf8proc_ruby_module, "Options", utf8proc_ruby_options); 1.158 +} 1.159 + 1.160 +