utf8proc
view pgsql/utf8proc_pgsql.c @ 10:00d2bcbdc945
Version 1.1.4
- replaced C++ style comments for compatibility reasons
- added typecasts to suppress compiler warnings
- removed redundant source files for ruby-gemfile generation
- Changed copyright notice for Public Software Group e. V.
- Minor changes in the README file
- replaced C++ style comments for compatibility reasons
- added typecasts to suppress compiler warnings
- removed redundant source files for ruby-gemfile generation
- Changed copyright notice for Public Software Group e. V.
- Minor changes in the README file
author | jbe |
---|---|
date | Wed Aug 19 12:00:00 2009 +0200 (2009-08-19) |
parents | 951e73a98021 |
children |
line source
1 /*
2 * Copyright (c) Public Software Group e. V., Berlin, Germany
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
24 /*
25 * File name: pgsql/utf8proc_pgsql.c
26 *
27 * Description:
28 * PostgreSQL extension to provide two functions 'unifold' and 'unistrip',
29 * which can be used to case-fold and normalize index fields and
30 * optionally strip marks (e.g. accents) from strings.
31 */
34 #include "../utf8proc.c"
36 #include <postgres.h>
37 #include <utils/elog.h>
38 #include <fmgr.h>
39 #include <string.h>
40 #include <unistd.h>
41 #include <utils/builtins.h>
43 #ifdef PG_MODULE_MAGIC
44 PG_MODULE_MAGIC;
45 #endif
47 #define UTF8PROC_PGSQL_FOLD_OPTS ( UTF8PROC_REJECTNA | UTF8PROC_COMPAT | \
48 UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | \
49 UTF8PROC_NLF2LF | UTF8PROC_CASEFOLD | UTF8PROC_LUMP )
50 #define UTF8PROC_PGSQL_STRIP_OPTS ( UTF8PROC_REJECTNA | UTF8PROC_COMPAT | \
51 UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | \
52 UTF8PROC_NLF2LF | UTF8PROC_CASEFOLD | UTF8PROC_LUMP | UTF8PROC_STRIPMARK )
54 ssize_t utf8proc_pgsql_utf8map(
55 text *input_string, text **output_string_ptr, int options
56 ) {
57 ssize_t result;
58 text *output_string;
59 result = utf8proc_decompose(
60 VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ,
61 NULL, 0, options
62 );
63 if (result < 0) return result;
64 if (result > (SIZE_MAX-1-VARHDRSZ)/sizeof(int32_t))
65 return UTF8PROC_ERROR_OVERFLOW;
66 /* reserve one extra byte for termination */
67 *output_string_ptr = palloc(result * sizeof(int32_t) + 1 + VARHDRSZ);
68 output_string = *output_string_ptr;
69 if (!output_string) return UTF8PROC_ERROR_NOMEM;
70 result = utf8proc_decompose(
71 VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ,
72 (int32_t *)VARDATA(output_string), result, options
73 );
74 if (result < 0) return result;
75 result = utf8proc_reencode(
76 (int32_t *)VARDATA(output_string), result, options
77 );
78 if (result >= 0) SET_VARSIZE(output_string, result + VARHDRSZ);
79 return result;
80 }
82 void utf8proc_pgsql_utf8map_errchk(ssize_t result, text *output_string) {
83 if (result < 0) {
84 int sqlerrcode;
85 if (output_string) pfree(output_string);
86 switch(result) {
87 case UTF8PROC_ERROR_NOMEM:
88 sqlerrcode = ERRCODE_OUT_OF_MEMORY; break;
89 case UTF8PROC_ERROR_OVERFLOW:
90 sqlerrcode = ERRCODE_PROGRAM_LIMIT_EXCEEDED; break;
91 case UTF8PROC_ERROR_INVALIDUTF8:
92 case UTF8PROC_ERROR_NOTASSIGNED:
93 return;
94 default:
95 sqlerrcode = ERRCODE_INTERNAL_ERROR;
96 }
97 ereport(ERROR, (
98 errcode(sqlerrcode),
99 errmsg("%s", utf8proc_errmsg(result))
100 ));
101 }
102 }
104 PG_FUNCTION_INFO_V1(utf8proc_pgsql_unifold);
105 Datum utf8proc_pgsql_unifold(PG_FUNCTION_ARGS) {
106 text *input_string;
107 text *output_string = NULL;
108 ssize_t result;
109 input_string = PG_GETARG_TEXT_P(0);
110 result = utf8proc_pgsql_utf8map(
111 input_string, &output_string, UTF8PROC_PGSQL_FOLD_OPTS
112 );
113 PG_FREE_IF_COPY(input_string, 0);
114 utf8proc_pgsql_utf8map_errchk(result, output_string);
115 if (result >= 0) {
116 PG_RETURN_TEXT_P(output_string);
117 } else {
118 PG_RETURN_NULL();
119 }
120 }
122 PG_FUNCTION_INFO_V1(utf8proc_pgsql_unistrip);
123 Datum utf8proc_pgsql_unistrip(PG_FUNCTION_ARGS) {
124 text *input_string;
125 text *output_string = NULL;
126 ssize_t result;
127 input_string = PG_GETARG_TEXT_P(0);
128 result = utf8proc_pgsql_utf8map(
129 input_string, &output_string, UTF8PROC_PGSQL_STRIP_OPTS
130 );
131 PG_FREE_IF_COPY(input_string, 0);
132 utf8proc_pgsql_utf8map_errchk(result, output_string);
133 if (result >= 0) {
134 PG_RETURN_TEXT_P(output_string);
135 } else {
136 PG_RETURN_NULL();
137 }
138 }