utf8proc

view ruby/utf8proc_native.c @ 7:fcfd8c836c64

Version 1.1.1

- Added a new PostgreSQL function 'unistrip', which behaves like 'unifold', but also removes all character marks (e.g. accents).
- Changed license from BSD to MIT style.
- Added a new function 'utf8proc_codepoint_valid' to the C library.
- Changed compiler flags in Makefile from -g -O0 to -O2
- The ruby script, which was used to build the utf8proc_data.c file, is now included in the distribution.
author jbe
date Sun Jul 22 12:00:00 2007 +0200 (2007-07-22)
parents c18366878af9
children 00d2bcbdc945
line source
1 /*
2 * Copyright (c) 2006-2007 Jan Behrens, FlexiGuided GmbH, Berlin
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
24 /*
25 * File name: ruby/utf8proc_native.c
26 * Version: 1.1.1
27 * Last changed: 2007-07-22
28 *
29 * Description:
30 * Native part of the ruby wrapper for libutf8proc.
31 */
34 #include "../utf8proc.c"
35 #include "ruby.h"
37 typedef struct utf8proc_ruby_mapenv_struct {
38 int32_t *buffer;
39 } utf8proc_ruby_mapenv_t;
41 void utf8proc_ruby_mapenv_free(utf8proc_ruby_mapenv_t *env) {
42 free(env->buffer);
43 free(env);
44 }
46 VALUE utf8proc_ruby_module;
47 VALUE utf8proc_ruby_options;
48 VALUE utf8proc_ruby_eUnicodeError;
49 VALUE utf8proc_ruby_eInvalidUtf8Error;
50 VALUE utf8proc_ruby_eCodeNotAssignedError;
52 VALUE utf8proc_ruby_map_error(ssize_t result) {
53 VALUE excpt_class;
54 switch (result) {
55 case UTF8PROC_ERROR_NOMEM:
56 excpt_class = rb_eNoMemError; break;
57 case UTF8PROC_ERROR_OVERFLOW:
58 case UTF8PROC_ERROR_INVALIDOPTS:
59 excpt_class = rb_eArgError; break;
60 case UTF8PROC_ERROR_INVALIDUTF8:
61 excpt_class = utf8proc_ruby_eInvalidUtf8Error; break;
62 case UTF8PROC_ERROR_NOTASSIGNED:
63 excpt_class = utf8proc_ruby_eCodeNotAssignedError; break;
64 default:
65 excpt_class = rb_eRuntimeError;
66 }
67 rb_raise(excpt_class, "%s", utf8proc_errmsg(result));
68 return Qnil;
69 }
71 VALUE utf8proc_ruby_map(VALUE self, VALUE str_param, VALUE options_param) {
72 VALUE str;
73 int options;
74 VALUE env_obj;
75 utf8proc_ruby_mapenv_t *env;
76 ssize_t result;
77 VALUE retval;
78 str = StringValue(str_param);
79 options = NUM2INT(options_param) & ~UTF8PROC_NULLTERM;
80 env_obj = Data_Make_Struct(rb_cObject, utf8proc_ruby_mapenv_t, NULL,
81 utf8proc_ruby_mapenv_free, env);
82 result = utf8proc_decompose(RSTRING(str)->ptr, RSTRING(str)->len,
83 NULL, 0, options);
84 if (result < 0) {
85 utf8proc_ruby_map_error(result);
86 return Qnil; // needed to prevent problems with optimization
87 }
88 env->buffer = ALLOC_N(int32_t, result+1);
89 result = utf8proc_decompose(RSTRING(str)->ptr, RSTRING(str)->len,
90 env->buffer, result, options);
91 if (result < 0) {
92 free(env->buffer);
93 env->buffer = 0;
94 utf8proc_ruby_map_error(result);
95 return Qnil; // needed to prevent problems with optimization
96 }
97 result = utf8proc_reencode(env->buffer, result, options);
98 if (result < 0) {
99 free(env->buffer);
100 env->buffer = 0;
101 utf8proc_ruby_map_error(result);
102 return Qnil; // needed to prevent problems with optimization
103 }
104 retval = rb_str_new((char *)env->buffer, result);
105 free(env->buffer);
106 env->buffer = 0;
107 return retval;
108 }
110 static VALUE utf8proc_ruby_char(VALUE self, VALUE code_param) {
111 char buffer[4];
112 ssize_t result;
113 int uc;
114 uc = NUM2INT(code_param);
115 if (!utf8proc_codepoint_valid(uc))
116 rb_raise(rb_eArgError, "Invalid Unicode code point");
117 result = utf8proc_encode_char(uc, buffer);
118 return rb_str_new(buffer, result);
119 }
121 #define register_utf8proc_option(sym, field) \
122 rb_hash_aset(utf8proc_ruby_options, ID2SYM(rb_intern(sym)), INT2FIX(field))
124 void Init_utf8proc_native() {
125 utf8proc_ruby_module = rb_define_module("Utf8Proc");
126 rb_define_module_function(utf8proc_ruby_module, "utf8map",
127 utf8proc_ruby_map, 2);
128 rb_define_module_function(utf8proc_ruby_module, "utf8char",
129 utf8proc_ruby_char, 1);
130 utf8proc_ruby_eUnicodeError = rb_define_class_under(utf8proc_ruby_module,
131 "UnicodeError", rb_eStandardError);
132 utf8proc_ruby_eInvalidUtf8Error = rb_define_class_under(
133 utf8proc_ruby_module, "InvalidUtf8Error", utf8proc_ruby_eUnicodeError);
134 utf8proc_ruby_eCodeNotAssignedError = rb_define_class_under(
135 utf8proc_ruby_module, "CodeNotAssignedError",
136 utf8proc_ruby_eUnicodeError);
137 utf8proc_ruby_options = rb_hash_new();
138 register_utf8proc_option("stable", UTF8PROC_STABLE);
139 register_utf8proc_option("compat", UTF8PROC_COMPAT);
140 register_utf8proc_option("compose", UTF8PROC_COMPOSE);
141 register_utf8proc_option("decompose", UTF8PROC_DECOMPOSE);
142 register_utf8proc_option("ignore", UTF8PROC_IGNORE);
143 register_utf8proc_option("rejectna", UTF8PROC_REJECTNA);
144 register_utf8proc_option("nlf2ls", UTF8PROC_NLF2LS);
145 register_utf8proc_option("nlf2ps", UTF8PROC_NLF2PS);
146 register_utf8proc_option("nlf2lf", UTF8PROC_NLF2LF);
147 register_utf8proc_option("stripcc", UTF8PROC_STRIPCC);
148 register_utf8proc_option("casefold", UTF8PROC_CASEFOLD);
149 register_utf8proc_option("charbound", UTF8PROC_CHARBOUND);
150 register_utf8proc_option("lump", UTF8PROC_LUMP);
151 register_utf8proc_option("stripmark", UTF8PROC_STRIPMARK);
152 OBJ_FREEZE(utf8proc_ruby_options);
153 rb_define_const(utf8proc_ruby_module, "Options", utf8proc_ruby_options);
154 }

Impressum / About Us