utf8proc

annotate pgsql/utf8proc_pgsql.c @ 17:47b467f4c128

Contribution from libmojibake fork (missing file "normtest.c")
author Jiahao Chen, Steven G. Johnson, Anthony David Kelman
date Mon Dec 01 14:32:19 2014 -0500 (2014-12-01)
parents 00d2bcbdc945
children
rev   line source
jbe@0 1 /*
jbe@10 2 * Copyright (c) Public Software Group e. V., Berlin, Germany
jbe@0 3 *
jbe@7 4 * Permission is hereby granted, free of charge, to any person obtaining a
jbe@7 5 * copy of this software and associated documentation files (the "Software"),
jbe@7 6 * to deal in the Software without restriction, including without limitation
jbe@7 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
jbe@7 8 * and/or sell copies of the Software, and to permit persons to whom the
jbe@7 9 * Software is furnished to do so, subject to the following conditions:
jbe@0 10 *
jbe@7 11 * The above copyright notice and this permission notice shall be included in
jbe@7 12 * all copies or substantial portions of the Software.
jbe@0 13 *
jbe@7 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
jbe@7 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
jbe@7 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
jbe@7 17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
jbe@7 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
jbe@7 19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
jbe@7 20 * DEALINGS IN THE SOFTWARE.
jbe@0 21 */
jbe@7 22
jbe@0 23
jbe@0 24 /*
jbe@0 25 * File name: pgsql/utf8proc_pgsql.c
jbe@0 26 *
jbe@0 27 * Description:
jbe@7 28 * PostgreSQL extension to provide two functions 'unifold' and 'unistrip',
jbe@7 29 * which can be used to case-fold and normalize index fields and
jbe@7 30 * optionally strip marks (e.g. accents) from strings.
jbe@0 31 */
jbe@0 32
jbe@0 33
jbe@0 34 #include "../utf8proc.c"
jbe@0 35
jbe@0 36 #include <postgres.h>
jbe@0 37 #include <utils/elog.h>
jbe@0 38 #include <fmgr.h>
jbe@1 39 #include <string.h>
jbe@0 40 #include <unistd.h>
jbe@0 41 #include <utils/builtins.h>
jbe@0 42
jbe@5 43 #ifdef PG_MODULE_MAGIC
jbe@5 44 PG_MODULE_MAGIC;
jbe@5 45 #endif
jbe@5 46
jbe@7 47 #define UTF8PROC_PGSQL_FOLD_OPTS ( UTF8PROC_REJECTNA | UTF8PROC_COMPAT | \
jbe@2 48 UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | \
jbe@3 49 UTF8PROC_NLF2LF | UTF8PROC_CASEFOLD | UTF8PROC_LUMP )
jbe@7 50 #define UTF8PROC_PGSQL_STRIP_OPTS ( UTF8PROC_REJECTNA | UTF8PROC_COMPAT | \
jbe@7 51 UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | \
jbe@7 52 UTF8PROC_NLF2LF | UTF8PROC_CASEFOLD | UTF8PROC_LUMP | UTF8PROC_STRIPMARK )
jbe@1 53
jbe@7 54 ssize_t utf8proc_pgsql_utf8map(
jbe@7 55 text *input_string, text **output_string_ptr, int options
jbe@7 56 ) {
jbe@0 57 ssize_t result;
jbe@7 58 text *output_string;
jbe@7 59 result = utf8proc_decompose(
jbe@7 60 VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ,
jbe@7 61 NULL, 0, options
jbe@7 62 );
jbe@7 63 if (result < 0) return result;
jbe@7 64 if (result > (SIZE_MAX-1-VARHDRSZ)/sizeof(int32_t))
jbe@7 65 return UTF8PROC_ERROR_OVERFLOW;
jbe@10 66 /* reserve one extra byte for termination */
jbe@7 67 *output_string_ptr = palloc(result * sizeof(int32_t) + 1 + VARHDRSZ);
jbe@7 68 output_string = *output_string_ptr;
jbe@7 69 if (!output_string) return UTF8PROC_ERROR_NOMEM;
jbe@7 70 result = utf8proc_decompose(
jbe@7 71 VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ,
jbe@7 72 (int32_t *)VARDATA(output_string), result, options
jbe@7 73 );
jbe@7 74 if (result < 0) return result;
jbe@7 75 result = utf8proc_reencode(
jbe@7 76 (int32_t *)VARDATA(output_string), result, options
jbe@7 77 );
jbe@9 78 if (result >= 0) SET_VARSIZE(output_string, result + VARHDRSZ);
jbe@7 79 return result;
jbe@7 80 }
jbe@7 81
jbe@7 82 void utf8proc_pgsql_utf8map_errchk(ssize_t result, text *output_string) {
jbe@0 83 if (result < 0) {
jbe@0 84 int sqlerrcode;
jbe@1 85 if (output_string) pfree(output_string);
jbe@0 86 switch(result) {
jbe@0 87 case UTF8PROC_ERROR_NOMEM:
jbe@0 88 sqlerrcode = ERRCODE_OUT_OF_MEMORY; break;
jbe@0 89 case UTF8PROC_ERROR_OVERFLOW:
jbe@0 90 sqlerrcode = ERRCODE_PROGRAM_LIMIT_EXCEEDED; break;
jbe@0 91 case UTF8PROC_ERROR_INVALIDUTF8:
jbe@0 92 case UTF8PROC_ERROR_NOTASSIGNED:
jbe@7 93 return;
jbe@0 94 default:
jbe@0 95 sqlerrcode = ERRCODE_INTERNAL_ERROR;
jbe@0 96 }
jbe@0 97 ereport(ERROR, (
jbe@0 98 errcode(sqlerrcode),
jbe@0 99 errmsg("%s", utf8proc_errmsg(result))
jbe@0 100 ));
jbe@0 101 }
jbe@0 102 }
jbe@0 103
jbe@7 104 PG_FUNCTION_INFO_V1(utf8proc_pgsql_unifold);
jbe@7 105 Datum utf8proc_pgsql_unifold(PG_FUNCTION_ARGS) {
jbe@7 106 text *input_string;
jbe@7 107 text *output_string = NULL;
jbe@7 108 ssize_t result;
jbe@7 109 input_string = PG_GETARG_TEXT_P(0);
jbe@7 110 result = utf8proc_pgsql_utf8map(
jbe@7 111 input_string, &output_string, UTF8PROC_PGSQL_FOLD_OPTS
jbe@7 112 );
jbe@7 113 PG_FREE_IF_COPY(input_string, 0);
jbe@7 114 utf8proc_pgsql_utf8map_errchk(result, output_string);
jbe@7 115 if (result >= 0) {
jbe@7 116 PG_RETURN_TEXT_P(output_string);
jbe@7 117 } else {
jbe@7 118 PG_RETURN_NULL();
jbe@7 119 }
jbe@7 120 }
jbe@0 121
jbe@7 122 PG_FUNCTION_INFO_V1(utf8proc_pgsql_unistrip);
jbe@7 123 Datum utf8proc_pgsql_unistrip(PG_FUNCTION_ARGS) {
jbe@7 124 text *input_string;
jbe@7 125 text *output_string = NULL;
jbe@7 126 ssize_t result;
jbe@7 127 input_string = PG_GETARG_TEXT_P(0);
jbe@7 128 result = utf8proc_pgsql_utf8map(
jbe@7 129 input_string, &output_string, UTF8PROC_PGSQL_STRIP_OPTS
jbe@7 130 );
jbe@7 131 PG_FREE_IF_COPY(input_string, 0);
jbe@7 132 utf8proc_pgsql_utf8map_errchk(result, output_string);
jbe@7 133 if (result >= 0) {
jbe@7 134 PG_RETURN_TEXT_P(output_string);
jbe@7 135 } else {
jbe@7 136 PG_RETURN_NULL();
jbe@7 137 }
jbe@7 138 }
jbe@7 139

Impressum / About Us