rev |
line source |
jbe@0
|
1 /*
|
jbe@10
|
2 * Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
|
jbe@0
|
3 *
|
jbe@7
|
4 * Permission is hereby granted, free of charge, to any person obtaining a
|
jbe@7
|
5 * copy of this software and associated documentation files (the "Software"),
|
jbe@7
|
6 * to deal in the Software without restriction, including without limitation
|
jbe@7
|
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
jbe@7
|
8 * and/or sell copies of the Software, and to permit persons to whom the
|
jbe@7
|
9 * Software is furnished to do so, subject to the following conditions:
|
jbe@0
|
10 *
|
jbe@7
|
11 * The above copyright notice and this permission notice shall be included in
|
jbe@7
|
12 * all copies or substantial portions of the Software.
|
jbe@0
|
13 *
|
jbe@7
|
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
jbe@7
|
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
jbe@7
|
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
jbe@7
|
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
jbe@7
|
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
jbe@7
|
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
jbe@7
|
20 * DEALINGS IN THE SOFTWARE.
|
jbe@0
|
21 */
|
jbe@7
|
22
|
jbe@0
|
23
|
jbe@0
|
24 /*
|
jbe@0
|
25 * File name: ruby/utf8proc_native.c
|
jbe@0
|
26 *
|
jbe@0
|
27 * Description:
|
jbe@0
|
28 * Native part of the ruby wrapper for libutf8proc.
|
jbe@0
|
29 */
|
jbe@0
|
30
|
jbe@0
|
31
|
jbe@0
|
32 #include "../utf8proc.c"
|
jbe@0
|
33 #include "ruby.h"
|
jbe@0
|
34
|
jbe@11
|
35 #ifndef RSTRING_PTR
|
jbe@11
|
36 #define RSTRING_PTR(s) (RSTRING(s)->ptr)
|
jbe@11
|
37 #endif
|
jbe@11
|
38 #ifndef RSTRING_LEN
|
jbe@11
|
39 #define RSTRING_LEN(s) (RSTRING(s)->len)
|
jbe@11
|
40 #endif
|
jbe@11
|
41
|
jbe@0
|
42 typedef struct utf8proc_ruby_mapenv_struct {
|
jbe@0
|
43 int32_t *buffer;
|
jbe@0
|
44 } utf8proc_ruby_mapenv_t;
|
jbe@0
|
45
|
jbe@0
|
46 void utf8proc_ruby_mapenv_free(utf8proc_ruby_mapenv_t *env) {
|
jbe@0
|
47 free(env->buffer);
|
jbe@0
|
48 free(env);
|
jbe@0
|
49 }
|
jbe@0
|
50
|
jbe@0
|
51 VALUE utf8proc_ruby_module;
|
jbe@0
|
52 VALUE utf8proc_ruby_options;
|
jbe@0
|
53 VALUE utf8proc_ruby_eUnicodeError;
|
jbe@0
|
54 VALUE utf8proc_ruby_eInvalidUtf8Error;
|
jbe@0
|
55 VALUE utf8proc_ruby_eCodeNotAssignedError;
|
jbe@0
|
56
|
jbe@0
|
57 VALUE utf8proc_ruby_map_error(ssize_t result) {
|
jbe@0
|
58 VALUE excpt_class;
|
jbe@0
|
59 switch (result) {
|
jbe@0
|
60 case UTF8PROC_ERROR_NOMEM:
|
jbe@0
|
61 excpt_class = rb_eNoMemError; break;
|
jbe@0
|
62 case UTF8PROC_ERROR_OVERFLOW:
|
jbe@3
|
63 case UTF8PROC_ERROR_INVALIDOPTS:
|
jbe@0
|
64 excpt_class = rb_eArgError; break;
|
jbe@0
|
65 case UTF8PROC_ERROR_INVALIDUTF8:
|
jbe@0
|
66 excpt_class = utf8proc_ruby_eInvalidUtf8Error; break;
|
jbe@0
|
67 case UTF8PROC_ERROR_NOTASSIGNED:
|
jbe@0
|
68 excpt_class = utf8proc_ruby_eCodeNotAssignedError; break;
|
jbe@0
|
69 default:
|
jbe@0
|
70 excpt_class = rb_eRuntimeError;
|
jbe@0
|
71 }
|
jbe@0
|
72 rb_raise(excpt_class, "%s", utf8proc_errmsg(result));
|
jbe@0
|
73 return Qnil;
|
jbe@0
|
74 }
|
jbe@0
|
75
|
jbe@0
|
76 VALUE utf8proc_ruby_map(VALUE self, VALUE str_param, VALUE options_param) {
|
jbe@0
|
77 VALUE str;
|
jbe@0
|
78 int options;
|
jbe@0
|
79 VALUE env_obj;
|
jbe@0
|
80 utf8proc_ruby_mapenv_t *env;
|
jbe@0
|
81 ssize_t result;
|
jbe@0
|
82 VALUE retval;
|
jbe@0
|
83 str = StringValue(str_param);
|
jbe@0
|
84 options = NUM2INT(options_param) & ~UTF8PROC_NULLTERM;
|
jbe@0
|
85 env_obj = Data_Make_Struct(rb_cObject, utf8proc_ruby_mapenv_t, NULL,
|
jbe@0
|
86 utf8proc_ruby_mapenv_free, env);
|
jbe@11
|
87 result = utf8proc_decompose(RSTRING_PTR(str), RSTRING_LEN(str),
|
jbe@0
|
88 NULL, 0, options);
|
jbe@2
|
89 if (result < 0) {
|
jbe@2
|
90 utf8proc_ruby_map_error(result);
|
jbe@10
|
91 return Qnil; /* needed to prevent problems with optimization */
|
jbe@2
|
92 }
|
jbe@0
|
93 env->buffer = ALLOC_N(int32_t, result+1);
|
jbe@11
|
94 result = utf8proc_decompose(RSTRING_PTR(str), RSTRING_LEN(str),
|
jbe@0
|
95 env->buffer, result, options);
|
jbe@0
|
96 if (result < 0) {
|
jbe@0
|
97 free(env->buffer);
|
jbe@0
|
98 env->buffer = 0;
|
jbe@1
|
99 utf8proc_ruby_map_error(result);
|
jbe@10
|
100 return Qnil; /* needed to prevent problems with optimization */
|
jbe@0
|
101 }
|
jbe@0
|
102 result = utf8proc_reencode(env->buffer, result, options);
|
jbe@0
|
103 if (result < 0) {
|
jbe@0
|
104 free(env->buffer);
|
jbe@0
|
105 env->buffer = 0;
|
jbe@1
|
106 utf8proc_ruby_map_error(result);
|
jbe@10
|
107 return Qnil; /* needed to prevent problems with optimization */
|
jbe@0
|
108 }
|
jbe@0
|
109 retval = rb_str_new((char *)env->buffer, result);
|
jbe@0
|
110 free(env->buffer);
|
jbe@0
|
111 env->buffer = 0;
|
jbe@0
|
112 return retval;
|
jbe@0
|
113 }
|
jbe@0
|
114
|
jbe@0
|
115 static VALUE utf8proc_ruby_char(VALUE self, VALUE code_param) {
|
jbe@0
|
116 char buffer[4];
|
jbe@0
|
117 ssize_t result;
|
jbe@0
|
118 int uc;
|
jbe@0
|
119 uc = NUM2INT(code_param);
|
jbe@7
|
120 if (!utf8proc_codepoint_valid(uc))
|
jbe@0
|
121 rb_raise(rb_eArgError, "Invalid Unicode code point");
|
jbe@0
|
122 result = utf8proc_encode_char(uc, buffer);
|
jbe@0
|
123 return rb_str_new(buffer, result);
|
jbe@0
|
124 }
|
jbe@0
|
125
|
jbe@0
|
126 #define register_utf8proc_option(sym, field) \
|
jbe@0
|
127 rb_hash_aset(utf8proc_ruby_options, ID2SYM(rb_intern(sym)), INT2FIX(field))
|
jbe@0
|
128
|
jbe@0
|
129 void Init_utf8proc_native() {
|
jbe@0
|
130 utf8proc_ruby_module = rb_define_module("Utf8Proc");
|
jbe@0
|
131 rb_define_module_function(utf8proc_ruby_module, "utf8map",
|
jbe@0
|
132 utf8proc_ruby_map, 2);
|
jbe@0
|
133 rb_define_module_function(utf8proc_ruby_module, "utf8char",
|
jbe@0
|
134 utf8proc_ruby_char, 1);
|
jbe@0
|
135 utf8proc_ruby_eUnicodeError = rb_define_class_under(utf8proc_ruby_module,
|
jbe@0
|
136 "UnicodeError", rb_eStandardError);
|
jbe@0
|
137 utf8proc_ruby_eInvalidUtf8Error = rb_define_class_under(
|
jbe@0
|
138 utf8proc_ruby_module, "InvalidUtf8Error", utf8proc_ruby_eUnicodeError);
|
jbe@0
|
139 utf8proc_ruby_eCodeNotAssignedError = rb_define_class_under(
|
jbe@0
|
140 utf8proc_ruby_module, "CodeNotAssignedError",
|
jbe@0
|
141 utf8proc_ruby_eUnicodeError);
|
jbe@0
|
142 utf8proc_ruby_options = rb_hash_new();
|
jbe@2
|
143 register_utf8proc_option("stable", UTF8PROC_STABLE);
|
jbe@2
|
144 register_utf8proc_option("compat", UTF8PROC_COMPAT);
|
jbe@2
|
145 register_utf8proc_option("compose", UTF8PROC_COMPOSE);
|
jbe@2
|
146 register_utf8proc_option("decompose", UTF8PROC_DECOMPOSE);
|
jbe@2
|
147 register_utf8proc_option("ignore", UTF8PROC_IGNORE);
|
jbe@2
|
148 register_utf8proc_option("rejectna", UTF8PROC_REJECTNA);
|
jbe@2
|
149 register_utf8proc_option("nlf2ls", UTF8PROC_NLF2LS);
|
jbe@2
|
150 register_utf8proc_option("nlf2ps", UTF8PROC_NLF2PS);
|
jbe@2
|
151 register_utf8proc_option("nlf2lf", UTF8PROC_NLF2LF);
|
jbe@2
|
152 register_utf8proc_option("stripcc", UTF8PROC_STRIPCC);
|
jbe@2
|
153 register_utf8proc_option("casefold", UTF8PROC_CASEFOLD);
|
jbe@2
|
154 register_utf8proc_option("charbound", UTF8PROC_CHARBOUND);
|
jbe@3
|
155 register_utf8proc_option("lump", UTF8PROC_LUMP);
|
jbe@3
|
156 register_utf8proc_option("stripmark", UTF8PROC_STRIPMARK);
|
jbe@0
|
157 OBJ_FREEZE(utf8proc_ruby_options);
|
jbe@0
|
158 rb_define_const(utf8proc_ruby_module, "Options", utf8proc_ruby_options);
|
jbe@0
|
159 }
|
jbe@0
|
160
|