utf8proc

view pgsql/utf8proc_pgsql.c @ 7:fcfd8c836c64

Version 1.1.1

- Added a new PostgreSQL function 'unistrip', which behaves like 'unifold', but also removes all character marks (e.g. accents).
- Changed license from BSD to MIT style.
- Added a new function 'utf8proc_codepoint_valid' to the C library.
- Changed compiler flags in Makefile from -g -O0 to -O2
- The ruby script, which was used to build the utf8proc_data.c file, is now included in the distribution.
author jbe
date Sun Jul 22 12:00:00 2007 +0200 (2007-07-22)
parents c18366878af9
children 951e73a98021
line source
1 /*
2 * Copyright (c) 2006-2007 Jan Behrens, FlexiGuided GmbH, Berlin
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
24 /*
25 * File name: pgsql/utf8proc_pgsql.c
26 * Version: 1.1.1
27 * Last changed: 2007-07-22
28 *
29 * Description:
30 * PostgreSQL extension to provide two functions 'unifold' and 'unistrip',
31 * which can be used to case-fold and normalize index fields and
32 * optionally strip marks (e.g. accents) from strings.
33 */
36 #include "../utf8proc.c"
38 #include <postgres.h>
39 #include <utils/elog.h>
40 #include <fmgr.h>
41 #include <string.h>
42 #include <unistd.h>
43 #include <utils/builtins.h>
45 #ifdef PG_MODULE_MAGIC
46 PG_MODULE_MAGIC;
47 #endif
49 #define UTF8PROC_PGSQL_FOLD_OPTS ( UTF8PROC_REJECTNA | UTF8PROC_COMPAT | \
50 UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | \
51 UTF8PROC_NLF2LF | UTF8PROC_CASEFOLD | UTF8PROC_LUMP )
52 #define UTF8PROC_PGSQL_STRIP_OPTS ( UTF8PROC_REJECTNA | UTF8PROC_COMPAT | \
53 UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | \
54 UTF8PROC_NLF2LF | UTF8PROC_CASEFOLD | UTF8PROC_LUMP | UTF8PROC_STRIPMARK )
56 ssize_t utf8proc_pgsql_utf8map(
57 text *input_string, text **output_string_ptr, int options
58 ) {
59 ssize_t result;
60 text *output_string;
61 result = utf8proc_decompose(
62 VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ,
63 NULL, 0, options
64 );
65 if (result < 0) return result;
66 if (result > (SIZE_MAX-1-VARHDRSZ)/sizeof(int32_t))
67 return UTF8PROC_ERROR_OVERFLOW;
68 // reserve one extra byte for termination
69 *output_string_ptr = palloc(result * sizeof(int32_t) + 1 + VARHDRSZ);
70 output_string = *output_string_ptr;
71 if (!output_string) return UTF8PROC_ERROR_NOMEM;
72 result = utf8proc_decompose(
73 VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ,
74 (int32_t *)VARDATA(output_string), result, options
75 );
76 if (result < 0) return result;
77 result = utf8proc_reencode(
78 (int32_t *)VARDATA(output_string), result, options
79 );
80 if (result >= 0) VARATT_SIZEP(output_string) = result + VARHDRSZ;
81 return result;
82 }
84 void utf8proc_pgsql_utf8map_errchk(ssize_t result, text *output_string) {
85 if (result < 0) {
86 int sqlerrcode;
87 if (output_string) pfree(output_string);
88 switch(result) {
89 case UTF8PROC_ERROR_NOMEM:
90 sqlerrcode = ERRCODE_OUT_OF_MEMORY; break;
91 case UTF8PROC_ERROR_OVERFLOW:
92 sqlerrcode = ERRCODE_PROGRAM_LIMIT_EXCEEDED; break;
93 case UTF8PROC_ERROR_INVALIDUTF8:
94 case UTF8PROC_ERROR_NOTASSIGNED:
95 return;
96 default:
97 sqlerrcode = ERRCODE_INTERNAL_ERROR;
98 }
99 ereport(ERROR, (
100 errcode(sqlerrcode),
101 errmsg("%s", utf8proc_errmsg(result))
102 ));
103 }
104 }
106 PG_FUNCTION_INFO_V1(utf8proc_pgsql_unifold);
107 Datum utf8proc_pgsql_unifold(PG_FUNCTION_ARGS) {
108 text *input_string;
109 text *output_string = NULL;
110 ssize_t result;
111 input_string = PG_GETARG_TEXT_P(0);
112 result = utf8proc_pgsql_utf8map(
113 input_string, &output_string, UTF8PROC_PGSQL_FOLD_OPTS
114 );
115 PG_FREE_IF_COPY(input_string, 0);
116 utf8proc_pgsql_utf8map_errchk(result, output_string);
117 if (result >= 0) {
118 PG_RETURN_TEXT_P(output_string);
119 } else {
120 PG_RETURN_NULL();
121 }
122 }
124 PG_FUNCTION_INFO_V1(utf8proc_pgsql_unistrip);
125 Datum utf8proc_pgsql_unistrip(PG_FUNCTION_ARGS) {
126 text *input_string;
127 text *output_string = NULL;
128 ssize_t result;
129 input_string = PG_GETARG_TEXT_P(0);
130 result = utf8proc_pgsql_utf8map(
131 input_string, &output_string, UTF8PROC_PGSQL_STRIP_OPTS
132 );
133 PG_FREE_IF_COPY(input_string, 0);
134 utf8proc_pgsql_utf8map_errchk(result, output_string);
135 if (result >= 0) {
136 PG_RETURN_TEXT_P(output_string);
137 } else {
138 PG_RETURN_NULL();
139 }
140 }

Impressum / About Us