utf8proc

diff ruby/utf8proc_native.c @ 0:a0368662434c

Version 0.1
author jbe
date Fri Jun 02 12:00:00 2006 +0200 (2006-06-02)
parents
children 61a89ecc2fb9
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/ruby/utf8proc_native.c	Fri Jun 02 12:00:00 2006 +0200
     1.3 @@ -0,0 +1,157 @@
     1.4 +/*
     1.5 + *  Copyright (c) 2006, FlexiGuided GmbH, Berlin, Germany
     1.6 + *  Author: Jan Behrens <jan.behrens@flexiguided.de>
     1.7 + *  All rights reserved.
     1.8 + *
     1.9 + *  Redistribution and use in source and binary forms, with or without
    1.10 + *  modification, are permitted provided that the following conditions are
    1.11 + *  met:
    1.12 + *
    1.13 + *  1. Redistributions of source code must retain the above copyright
    1.14 + *     notice, this list of conditions and the following disclaimer.
    1.15 + *  2. Redistributions in binary form must reproduce the above copyright
    1.16 + *     notice, this list of conditions and the following disclaimer in the
    1.17 + *     documentation and/or other materials provided with the distribution.
    1.18 + *  3. Neither the name of the FlexiGuided GmbH nor the names of its
    1.19 + *     contributors may be used to endorse or promote products derived from
    1.20 + *     this software without specific prior written permission.
    1.21 + *
    1.22 + *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    1.23 + *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    1.24 + *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    1.25 + *  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
    1.26 + *  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    1.27 + *  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    1.28 + *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
    1.29 + *  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
    1.30 + *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
    1.31 + *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
    1.32 + *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    1.33 + *
    1.34 + */
    1.35 + 
    1.36 +
    1.37 +/*
    1.38 + *  File name:    ruby/utf8proc_native.c
    1.39 + *  Version:      0.1
    1.40 + *  Last changed: 2006-05-31
    1.41 + *
    1.42 + *  Description:
    1.43 + *  Native part of the ruby wrapper for libutf8proc.
    1.44 + */
    1.45 +
    1.46 +
    1.47 +#include "../utf8proc.c"
    1.48 +#include "ruby.h"
    1.49 +
    1.50 +typedef struct utf8proc_ruby_mapenv_struct {
    1.51 +  int32_t *buffer;
    1.52 +} utf8proc_ruby_mapenv_t;
    1.53 +
    1.54 +void utf8proc_ruby_mapenv_free(utf8proc_ruby_mapenv_t *env) {
    1.55 +  free(env->buffer);
    1.56 +  free(env);
    1.57 +}
    1.58 +
    1.59 +VALUE utf8proc_ruby_module;
    1.60 +VALUE utf8proc_ruby_options;
    1.61 +VALUE utf8proc_ruby_eUnicodeError;
    1.62 +VALUE utf8proc_ruby_eInvalidUtf8Error;
    1.63 +VALUE utf8proc_ruby_eCodeNotAssignedError;
    1.64 +
    1.65 +VALUE utf8proc_ruby_map_error(ssize_t result) {
    1.66 +  VALUE excpt_class;
    1.67 +  switch (result) {
    1.68 +    case UTF8PROC_ERROR_NOMEM:
    1.69 +    excpt_class = rb_eNoMemError; break;
    1.70 +    case UTF8PROC_ERROR_OVERFLOW:
    1.71 +    excpt_class = rb_eArgError; break;
    1.72 +    case UTF8PROC_ERROR_INVALIDUTF8:
    1.73 +    excpt_class = utf8proc_ruby_eInvalidUtf8Error; break;
    1.74 +    case UTF8PROC_ERROR_NOTASSIGNED:
    1.75 +    excpt_class = utf8proc_ruby_eCodeNotAssignedError; break;
    1.76 +    default:
    1.77 +    excpt_class = rb_eRuntimeError;
    1.78 +  }
    1.79 +  rb_raise(excpt_class, "%s", utf8proc_errmsg(result));
    1.80 +  return Qnil;
    1.81 +}
    1.82 +
    1.83 +VALUE utf8proc_ruby_map(VALUE self, VALUE str_param, VALUE options_param) {
    1.84 +  VALUE str;
    1.85 +  int options;
    1.86 +  VALUE env_obj;
    1.87 +  utf8proc_ruby_mapenv_t *env;
    1.88 +  ssize_t result;
    1.89 +  VALUE retval;
    1.90 +  str = StringValue(str_param);
    1.91 +  options = NUM2INT(options_param) & ~UTF8PROC_NULLTERM;
    1.92 +  env_obj = Data_Make_Struct(rb_cObject, utf8proc_ruby_mapenv_t, NULL,
    1.93 +    utf8proc_ruby_mapenv_free, env);
    1.94 +  result = utf8proc_decompose(RSTRING(str)->ptr, RSTRING(str)->len,
    1.95 +    NULL, 0, options);
    1.96 +  if (result < 0) utf8proc_ruby_map_error(result);
    1.97 +  env->buffer = ALLOC_N(int32_t, result+1);
    1.98 +  result = utf8proc_decompose(RSTRING(str)->ptr, RSTRING(str)->len,
    1.99 +    env->buffer, result, options);
   1.100 +  if (result < 0) {
   1.101 +    free(env->buffer);
   1.102 +    env->buffer = 0;
   1.103 +    func_map_error(result);
   1.104 +  }
   1.105 +  result = utf8proc_reencode(env->buffer, result, options);
   1.106 +  if (result < 0) {
   1.107 +    free(env->buffer);
   1.108 +    env->buffer = 0;
   1.109 +    func_map_error(result);
   1.110 +  }
   1.111 +  retval = rb_str_new((char *)env->buffer, result);
   1.112 +  free(env->buffer);
   1.113 +  env->buffer = 0;
   1.114 +  return retval;
   1.115 +}
   1.116 +
   1.117 +static VALUE utf8proc_ruby_char(VALUE self, VALUE code_param) {
   1.118 +  char buffer[4];
   1.119 +  ssize_t result;
   1.120 +  int uc;
   1.121 +  uc = NUM2INT(code_param);
   1.122 +  if (uc < 0 || ((uc & 0xFFFF) >= 0xFFFE) || (uc >= 0xD800 && uc < 0xE000) ||
   1.123 +      (uc >= 0xFDD0 && uc < 0xFDF0))
   1.124 +    rb_raise(rb_eArgError, "Invalid Unicode code point");
   1.125 +  result = utf8proc_encode_char(uc, buffer);
   1.126 +  return rb_str_new(buffer, result);
   1.127 +}
   1.128 +
   1.129 +#define register_utf8proc_option(sym, field) \
   1.130 +  rb_hash_aset(utf8proc_ruby_options, ID2SYM(rb_intern(sym)), INT2FIX(field))
   1.131 +
   1.132 +void Init_utf8proc_native() {
   1.133 +  utf8proc_ruby_module = rb_define_module("Utf8Proc");
   1.134 +  rb_define_module_function(utf8proc_ruby_module, "utf8map",
   1.135 +    utf8proc_ruby_map, 2);
   1.136 +  rb_define_module_function(utf8proc_ruby_module, "utf8char",
   1.137 +    utf8proc_ruby_char, 1);
   1.138 +  utf8proc_ruby_eUnicodeError = rb_define_class_under(utf8proc_ruby_module,
   1.139 +    "UnicodeError", rb_eStandardError);
   1.140 +  utf8proc_ruby_eInvalidUtf8Error = rb_define_class_under(
   1.141 +    utf8proc_ruby_module, "InvalidUtf8Error", utf8proc_ruby_eUnicodeError);
   1.142 +  utf8proc_ruby_eCodeNotAssignedError = rb_define_class_under(
   1.143 +    utf8proc_ruby_module, "CodeNotAssignedError",
   1.144 +    utf8proc_ruby_eUnicodeError);
   1.145 +  utf8proc_ruby_options = rb_hash_new();
   1.146 +  register_utf8proc_option("stable",   UTF8PROC_STABLE);
   1.147 +  register_utf8proc_option("compat",   UTF8PROC_COMPAT);
   1.148 +  register_utf8proc_option("compose",  UTF8PROC_COMPOSE);
   1.149 +  register_utf8proc_option("ignore",   UTF8PROC_IGNORE);
   1.150 +  register_utf8proc_option("rejectna", UTF8PROC_REJECTNA);
   1.151 +  register_utf8proc_option("nlf2ls",   UTF8PROC_NLF2LS);
   1.152 +  register_utf8proc_option("nlf2ps",   UTF8PROC_NLF2PS);
   1.153 +  register_utf8proc_option("nlf2lf",   UTF8PROC_NLF2LF);
   1.154 +  register_utf8proc_option("stripcc",  UTF8PROC_STRIPCC);
   1.155 +  register_utf8proc_option("casefold", UTF8PROC_CASEFOLD);
   1.156 +  OBJ_FREEZE(utf8proc_ruby_options);
   1.157 +  rb_define_const(utf8proc_ruby_module, "Options", utf8proc_ruby_options);
   1.158 +}
   1.159 +
   1.160 +

Impressum / About Us