utf8proc

annotate pgsql/utf8proc_pgsql.c @ 7:fcfd8c836c64

Version 1.1.1

- Added a new PostgreSQL function 'unistrip', which behaves like 'unifold', but also removes all character marks (e.g. accents).
- Changed license from BSD to MIT style.
- Added a new function 'utf8proc_codepoint_valid' to the C library.
- Changed compiler flags in Makefile from -g -O0 to -O2
- The ruby script, which was used to build the utf8proc_data.c file, is now included in the distribution.
author jbe
date Sun Jul 22 12:00:00 2007 +0200 (2007-07-22)
parents c18366878af9
children 951e73a98021
rev   line source
jbe@0 1 /*
jbe@7 2 * Copyright (c) 2006-2007 Jan Behrens, FlexiGuided GmbH, Berlin
jbe@0 3 *
jbe@7 4 * Permission is hereby granted, free of charge, to any person obtaining a
jbe@7 5 * copy of this software and associated documentation files (the "Software"),
jbe@7 6 * to deal in the Software without restriction, including without limitation
jbe@7 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
jbe@7 8 * and/or sell copies of the Software, and to permit persons to whom the
jbe@7 9 * Software is furnished to do so, subject to the following conditions:
jbe@0 10 *
jbe@7 11 * The above copyright notice and this permission notice shall be included in
jbe@7 12 * all copies or substantial portions of the Software.
jbe@0 13 *
jbe@7 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
jbe@7 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
jbe@7 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
jbe@7 17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
jbe@7 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
jbe@7 19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
jbe@7 20 * DEALINGS IN THE SOFTWARE.
jbe@0 21 */
jbe@7 22
jbe@0 23
jbe@0 24 /*
jbe@0 25 * File name: pgsql/utf8proc_pgsql.c
jbe@7 26 * Version: 1.1.1
jbe@7 27 * Last changed: 2007-07-22
jbe@0 28 *
jbe@0 29 * Description:
jbe@7 30 * PostgreSQL extension to provide two functions 'unifold' and 'unistrip',
jbe@7 31 * which can be used to case-fold and normalize index fields and
jbe@7 32 * optionally strip marks (e.g. accents) from strings.
jbe@0 33 */
jbe@0 34
jbe@0 35
jbe@0 36 #include "../utf8proc.c"
jbe@0 37
jbe@0 38 #include <postgres.h>
jbe@0 39 #include <utils/elog.h>
jbe@0 40 #include <fmgr.h>
jbe@1 41 #include <string.h>
jbe@0 42 #include <unistd.h>
jbe@0 43 #include <utils/builtins.h>
jbe@0 44
jbe@5 45 #ifdef PG_MODULE_MAGIC
jbe@5 46 PG_MODULE_MAGIC;
jbe@5 47 #endif
jbe@5 48
jbe@7 49 #define UTF8PROC_PGSQL_FOLD_OPTS ( UTF8PROC_REJECTNA | UTF8PROC_COMPAT | \
jbe@2 50 UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | \
jbe@3 51 UTF8PROC_NLF2LF | UTF8PROC_CASEFOLD | UTF8PROC_LUMP )
jbe@7 52 #define UTF8PROC_PGSQL_STRIP_OPTS ( UTF8PROC_REJECTNA | UTF8PROC_COMPAT | \
jbe@7 53 UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | \
jbe@7 54 UTF8PROC_NLF2LF | UTF8PROC_CASEFOLD | UTF8PROC_LUMP | UTF8PROC_STRIPMARK )
jbe@1 55
jbe@7 56 ssize_t utf8proc_pgsql_utf8map(
jbe@7 57 text *input_string, text **output_string_ptr, int options
jbe@7 58 ) {
jbe@0 59 ssize_t result;
jbe@7 60 text *output_string;
jbe@7 61 result = utf8proc_decompose(
jbe@7 62 VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ,
jbe@7 63 NULL, 0, options
jbe@7 64 );
jbe@7 65 if (result < 0) return result;
jbe@7 66 if (result > (SIZE_MAX-1-VARHDRSZ)/sizeof(int32_t))
jbe@7 67 return UTF8PROC_ERROR_OVERFLOW;
jbe@7 68 // reserve one extra byte for termination
jbe@7 69 *output_string_ptr = palloc(result * sizeof(int32_t) + 1 + VARHDRSZ);
jbe@7 70 output_string = *output_string_ptr;
jbe@7 71 if (!output_string) return UTF8PROC_ERROR_NOMEM;
jbe@7 72 result = utf8proc_decompose(
jbe@7 73 VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ,
jbe@7 74 (int32_t *)VARDATA(output_string), result, options
jbe@7 75 );
jbe@7 76 if (result < 0) return result;
jbe@7 77 result = utf8proc_reencode(
jbe@7 78 (int32_t *)VARDATA(output_string), result, options
jbe@7 79 );
jbe@7 80 if (result >= 0) VARATT_SIZEP(output_string) = result + VARHDRSZ;
jbe@7 81 return result;
jbe@7 82 }
jbe@7 83
jbe@7 84 void utf8proc_pgsql_utf8map_errchk(ssize_t result, text *output_string) {
jbe@0 85 if (result < 0) {
jbe@0 86 int sqlerrcode;
jbe@1 87 if (output_string) pfree(output_string);
jbe@0 88 switch(result) {
jbe@0 89 case UTF8PROC_ERROR_NOMEM:
jbe@0 90 sqlerrcode = ERRCODE_OUT_OF_MEMORY; break;
jbe@0 91 case UTF8PROC_ERROR_OVERFLOW:
jbe@0 92 sqlerrcode = ERRCODE_PROGRAM_LIMIT_EXCEEDED; break;
jbe@0 93 case UTF8PROC_ERROR_INVALIDUTF8:
jbe@0 94 case UTF8PROC_ERROR_NOTASSIGNED:
jbe@7 95 return;
jbe@0 96 default:
jbe@0 97 sqlerrcode = ERRCODE_INTERNAL_ERROR;
jbe@0 98 }
jbe@0 99 ereport(ERROR, (
jbe@0 100 errcode(sqlerrcode),
jbe@0 101 errmsg("%s", utf8proc_errmsg(result))
jbe@0 102 ));
jbe@0 103 }
jbe@0 104 }
jbe@0 105
jbe@7 106 PG_FUNCTION_INFO_V1(utf8proc_pgsql_unifold);
jbe@7 107 Datum utf8proc_pgsql_unifold(PG_FUNCTION_ARGS) {
jbe@7 108 text *input_string;
jbe@7 109 text *output_string = NULL;
jbe@7 110 ssize_t result;
jbe@7 111 input_string = PG_GETARG_TEXT_P(0);
jbe@7 112 result = utf8proc_pgsql_utf8map(
jbe@7 113 input_string, &output_string, UTF8PROC_PGSQL_FOLD_OPTS
jbe@7 114 );
jbe@7 115 PG_FREE_IF_COPY(input_string, 0);
jbe@7 116 utf8proc_pgsql_utf8map_errchk(result, output_string);
jbe@7 117 if (result >= 0) {
jbe@7 118 PG_RETURN_TEXT_P(output_string);
jbe@7 119 } else {
jbe@7 120 PG_RETURN_NULL();
jbe@7 121 }
jbe@7 122 }
jbe@0 123
jbe@7 124 PG_FUNCTION_INFO_V1(utf8proc_pgsql_unistrip);
jbe@7 125 Datum utf8proc_pgsql_unistrip(PG_FUNCTION_ARGS) {
jbe@7 126 text *input_string;
jbe@7 127 text *output_string = NULL;
jbe@7 128 ssize_t result;
jbe@7 129 input_string = PG_GETARG_TEXT_P(0);
jbe@7 130 result = utf8proc_pgsql_utf8map(
jbe@7 131 input_string, &output_string, UTF8PROC_PGSQL_STRIP_OPTS
jbe@7 132 );
jbe@7 133 PG_FREE_IF_COPY(input_string, 0);
jbe@7 134 utf8proc_pgsql_utf8map_errchk(result, output_string);
jbe@7 135 if (result >= 0) {
jbe@7 136 PG_RETURN_TEXT_P(output_string);
jbe@7 137 } else {
jbe@7 138 PG_RETURN_NULL();
jbe@7 139 }
jbe@7 140 }
jbe@7 141

Impressum / About Us