rev |
line source |
jbe@0
|
1 /*
|
jbe@0
|
2 * Copyright (c) 2006, FlexiGuided GmbH, Berlin, Germany
|
jbe@0
|
3 * Author: Jan Behrens <jan.behrens@flexiguided.de>
|
jbe@0
|
4 * All rights reserved.
|
jbe@0
|
5 *
|
jbe@0
|
6 * Redistribution and use in source and binary forms, with or without
|
jbe@0
|
7 * modification, are permitted provided that the following conditions are
|
jbe@0
|
8 * met:
|
jbe@0
|
9 *
|
jbe@0
|
10 * 1. Redistributions of source code must retain the above copyright
|
jbe@0
|
11 * notice, this list of conditions and the following disclaimer.
|
jbe@0
|
12 * 2. Redistributions in binary form must reproduce the above copyright
|
jbe@0
|
13 * notice, this list of conditions and the following disclaimer in the
|
jbe@0
|
14 * documentation and/or other materials provided with the distribution.
|
jbe@0
|
15 * 3. Neither the name of the FlexiGuided GmbH nor the names of its
|
jbe@0
|
16 * contributors may be used to endorse or promote products derived from
|
jbe@0
|
17 * this software without specific prior written permission.
|
jbe@0
|
18 *
|
jbe@0
|
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
jbe@0
|
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
jbe@0
|
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
jbe@0
|
22 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
jbe@0
|
23 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
jbe@0
|
24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
jbe@0
|
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
jbe@0
|
26 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
jbe@0
|
27 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
jbe@0
|
28 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
jbe@0
|
29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
jbe@0
|
30 *
|
jbe@0
|
31 */
|
jbe@0
|
32
|
jbe@0
|
33
|
jbe@0
|
34 /*
|
jbe@0
|
35 * File name: ruby/utf8proc_native.c
|
jbe@0
|
36 * Version: 0.1
|
jbe@0
|
37 * Last changed: 2006-05-31
|
jbe@0
|
38 *
|
jbe@0
|
39 * Description:
|
jbe@0
|
40 * Native part of the ruby wrapper for libutf8proc.
|
jbe@0
|
41 */
|
jbe@0
|
42
|
jbe@0
|
43
|
jbe@0
|
44 #include "../utf8proc.c"
|
jbe@0
|
45 #include "ruby.h"
|
jbe@0
|
46
|
jbe@0
|
47 typedef struct utf8proc_ruby_mapenv_struct {
|
jbe@0
|
48 int32_t *buffer;
|
jbe@0
|
49 } utf8proc_ruby_mapenv_t;
|
jbe@0
|
50
|
jbe@0
|
51 void utf8proc_ruby_mapenv_free(utf8proc_ruby_mapenv_t *env) {
|
jbe@0
|
52 free(env->buffer);
|
jbe@0
|
53 free(env);
|
jbe@0
|
54 }
|
jbe@0
|
55
|
jbe@0
|
56 VALUE utf8proc_ruby_module;
|
jbe@0
|
57 VALUE utf8proc_ruby_options;
|
jbe@0
|
58 VALUE utf8proc_ruby_eUnicodeError;
|
jbe@0
|
59 VALUE utf8proc_ruby_eInvalidUtf8Error;
|
jbe@0
|
60 VALUE utf8proc_ruby_eCodeNotAssignedError;
|
jbe@0
|
61
|
jbe@0
|
62 VALUE utf8proc_ruby_map_error(ssize_t result) {
|
jbe@0
|
63 VALUE excpt_class;
|
jbe@0
|
64 switch (result) {
|
jbe@0
|
65 case UTF8PROC_ERROR_NOMEM:
|
jbe@0
|
66 excpt_class = rb_eNoMemError; break;
|
jbe@0
|
67 case UTF8PROC_ERROR_OVERFLOW:
|
jbe@0
|
68 excpt_class = rb_eArgError; break;
|
jbe@0
|
69 case UTF8PROC_ERROR_INVALIDUTF8:
|
jbe@0
|
70 excpt_class = utf8proc_ruby_eInvalidUtf8Error; break;
|
jbe@0
|
71 case UTF8PROC_ERROR_NOTASSIGNED:
|
jbe@0
|
72 excpt_class = utf8proc_ruby_eCodeNotAssignedError; break;
|
jbe@0
|
73 default:
|
jbe@0
|
74 excpt_class = rb_eRuntimeError;
|
jbe@0
|
75 }
|
jbe@0
|
76 rb_raise(excpt_class, "%s", utf8proc_errmsg(result));
|
jbe@0
|
77 return Qnil;
|
jbe@0
|
78 }
|
jbe@0
|
79
|
jbe@0
|
80 VALUE utf8proc_ruby_map(VALUE self, VALUE str_param, VALUE options_param) {
|
jbe@0
|
81 VALUE str;
|
jbe@0
|
82 int options;
|
jbe@0
|
83 VALUE env_obj;
|
jbe@0
|
84 utf8proc_ruby_mapenv_t *env;
|
jbe@0
|
85 ssize_t result;
|
jbe@0
|
86 VALUE retval;
|
jbe@0
|
87 str = StringValue(str_param);
|
jbe@0
|
88 options = NUM2INT(options_param) & ~UTF8PROC_NULLTERM;
|
jbe@0
|
89 env_obj = Data_Make_Struct(rb_cObject, utf8proc_ruby_mapenv_t, NULL,
|
jbe@0
|
90 utf8proc_ruby_mapenv_free, env);
|
jbe@0
|
91 result = utf8proc_decompose(RSTRING(str)->ptr, RSTRING(str)->len,
|
jbe@0
|
92 NULL, 0, options);
|
jbe@0
|
93 if (result < 0) utf8proc_ruby_map_error(result);
|
jbe@0
|
94 env->buffer = ALLOC_N(int32_t, result+1);
|
jbe@0
|
95 result = utf8proc_decompose(RSTRING(str)->ptr, RSTRING(str)->len,
|
jbe@0
|
96 env->buffer, result, options);
|
jbe@0
|
97 if (result < 0) {
|
jbe@0
|
98 free(env->buffer);
|
jbe@0
|
99 env->buffer = 0;
|
jbe@0
|
100 func_map_error(result);
|
jbe@0
|
101 }
|
jbe@0
|
102 result = utf8proc_reencode(env->buffer, result, options);
|
jbe@0
|
103 if (result < 0) {
|
jbe@0
|
104 free(env->buffer);
|
jbe@0
|
105 env->buffer = 0;
|
jbe@0
|
106 func_map_error(result);
|
jbe@0
|
107 }
|
jbe@0
|
108 retval = rb_str_new((char *)env->buffer, result);
|
jbe@0
|
109 free(env->buffer);
|
jbe@0
|
110 env->buffer = 0;
|
jbe@0
|
111 return retval;
|
jbe@0
|
112 }
|
jbe@0
|
113
|
jbe@0
|
114 static VALUE utf8proc_ruby_char(VALUE self, VALUE code_param) {
|
jbe@0
|
115 char buffer[4];
|
jbe@0
|
116 ssize_t result;
|
jbe@0
|
117 int uc;
|
jbe@0
|
118 uc = NUM2INT(code_param);
|
jbe@0
|
119 if (uc < 0 || ((uc & 0xFFFF) >= 0xFFFE) || (uc >= 0xD800 && uc < 0xE000) ||
|
jbe@0
|
120 (uc >= 0xFDD0 && uc < 0xFDF0))
|
jbe@0
|
121 rb_raise(rb_eArgError, "Invalid Unicode code point");
|
jbe@0
|
122 result = utf8proc_encode_char(uc, buffer);
|
jbe@0
|
123 return rb_str_new(buffer, result);
|
jbe@0
|
124 }
|
jbe@0
|
125
|
jbe@0
|
126 #define register_utf8proc_option(sym, field) \
|
jbe@0
|
127 rb_hash_aset(utf8proc_ruby_options, ID2SYM(rb_intern(sym)), INT2FIX(field))
|
jbe@0
|
128
|
jbe@0
|
129 void Init_utf8proc_native() {
|
jbe@0
|
130 utf8proc_ruby_module = rb_define_module("Utf8Proc");
|
jbe@0
|
131 rb_define_module_function(utf8proc_ruby_module, "utf8map",
|
jbe@0
|
132 utf8proc_ruby_map, 2);
|
jbe@0
|
133 rb_define_module_function(utf8proc_ruby_module, "utf8char",
|
jbe@0
|
134 utf8proc_ruby_char, 1);
|
jbe@0
|
135 utf8proc_ruby_eUnicodeError = rb_define_class_under(utf8proc_ruby_module,
|
jbe@0
|
136 "UnicodeError", rb_eStandardError);
|
jbe@0
|
137 utf8proc_ruby_eInvalidUtf8Error = rb_define_class_under(
|
jbe@0
|
138 utf8proc_ruby_module, "InvalidUtf8Error", utf8proc_ruby_eUnicodeError);
|
jbe@0
|
139 utf8proc_ruby_eCodeNotAssignedError = rb_define_class_under(
|
jbe@0
|
140 utf8proc_ruby_module, "CodeNotAssignedError",
|
jbe@0
|
141 utf8proc_ruby_eUnicodeError);
|
jbe@0
|
142 utf8proc_ruby_options = rb_hash_new();
|
jbe@0
|
143 register_utf8proc_option("stable", UTF8PROC_STABLE);
|
jbe@0
|
144 register_utf8proc_option("compat", UTF8PROC_COMPAT);
|
jbe@0
|
145 register_utf8proc_option("compose", UTF8PROC_COMPOSE);
|
jbe@0
|
146 register_utf8proc_option("ignore", UTF8PROC_IGNORE);
|
jbe@0
|
147 register_utf8proc_option("rejectna", UTF8PROC_REJECTNA);
|
jbe@0
|
148 register_utf8proc_option("nlf2ls", UTF8PROC_NLF2LS);
|
jbe@0
|
149 register_utf8proc_option("nlf2ps", UTF8PROC_NLF2PS);
|
jbe@0
|
150 register_utf8proc_option("nlf2lf", UTF8PROC_NLF2LF);
|
jbe@0
|
151 register_utf8proc_option("stripcc", UTF8PROC_STRIPCC);
|
jbe@0
|
152 register_utf8proc_option("casefold", UTF8PROC_CASEFOLD);
|
jbe@0
|
153 OBJ_FREEZE(utf8proc_ruby_options);
|
jbe@0
|
154 rb_define_const(utf8proc_ruby_module, "Options", utf8proc_ruby_options);
|
jbe@0
|
155 }
|
jbe@0
|
156
|
jbe@0
|
157
|