jbe@0: /* jbe@7: * Copyright (c) 2006-2007 Jan Behrens, FlexiGuided GmbH, Berlin jbe@0: * jbe@7: * Permission is hereby granted, free of charge, to any person obtaining a jbe@7: * copy of this software and associated documentation files (the "Software"), jbe@7: * to deal in the Software without restriction, including without limitation jbe@7: * the rights to use, copy, modify, merge, publish, distribute, sublicense, jbe@7: * and/or sell copies of the Software, and to permit persons to whom the jbe@7: * Software is furnished to do so, subject to the following conditions: jbe@0: * jbe@7: * The above copyright notice and this permission notice shall be included in jbe@7: * all copies or substantial portions of the Software. jbe@0: * jbe@7: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR jbe@7: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, jbe@7: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE jbe@7: * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER jbe@7: * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING jbe@7: * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER jbe@7: * DEALINGS IN THE SOFTWARE. jbe@0: */ jbe@7: jbe@0: jbe@0: /* jbe@0: * File name: pgsql/utf8proc_pgsql.c jbe@7: * Version: 1.1.1 jbe@7: * Last changed: 2007-07-22 jbe@0: * jbe@0: * Description: jbe@7: * PostgreSQL extension to provide two functions 'unifold' and 'unistrip', jbe@7: * which can be used to case-fold and normalize index fields and jbe@7: * optionally strip marks (e.g. accents) from strings. jbe@0: */ jbe@0: jbe@0: jbe@0: #include "../utf8proc.c" jbe@0: jbe@0: #include jbe@0: #include jbe@0: #include jbe@1: #include jbe@0: #include jbe@0: #include jbe@0: jbe@5: #ifdef PG_MODULE_MAGIC jbe@5: PG_MODULE_MAGIC; jbe@5: #endif jbe@5: jbe@7: #define UTF8PROC_PGSQL_FOLD_OPTS ( UTF8PROC_REJECTNA | UTF8PROC_COMPAT | \ jbe@2: UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | \ jbe@3: UTF8PROC_NLF2LF | UTF8PROC_CASEFOLD | UTF8PROC_LUMP ) jbe@7: #define UTF8PROC_PGSQL_STRIP_OPTS ( UTF8PROC_REJECTNA | UTF8PROC_COMPAT | \ jbe@7: UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | \ jbe@7: UTF8PROC_NLF2LF | UTF8PROC_CASEFOLD | UTF8PROC_LUMP | UTF8PROC_STRIPMARK ) jbe@1: jbe@7: ssize_t utf8proc_pgsql_utf8map( jbe@7: text *input_string, text **output_string_ptr, int options jbe@7: ) { jbe@0: ssize_t result; jbe@7: text *output_string; jbe@7: result = utf8proc_decompose( jbe@7: VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ, jbe@7: NULL, 0, options jbe@7: ); jbe@7: if (result < 0) return result; jbe@7: if (result > (SIZE_MAX-1-VARHDRSZ)/sizeof(int32_t)) jbe@7: return UTF8PROC_ERROR_OVERFLOW; jbe@7: // reserve one extra byte for termination jbe@7: *output_string_ptr = palloc(result * sizeof(int32_t) + 1 + VARHDRSZ); jbe@7: output_string = *output_string_ptr; jbe@7: if (!output_string) return UTF8PROC_ERROR_NOMEM; jbe@7: result = utf8proc_decompose( jbe@7: VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ, jbe@7: (int32_t *)VARDATA(output_string), result, options jbe@7: ); jbe@7: if (result < 0) return result; jbe@7: result = utf8proc_reencode( jbe@7: (int32_t *)VARDATA(output_string), result, options jbe@7: ); jbe@7: if (result >= 0) VARATT_SIZEP(output_string) = result + VARHDRSZ; jbe@7: return result; jbe@7: } jbe@7: jbe@7: void utf8proc_pgsql_utf8map_errchk(ssize_t result, text *output_string) { jbe@0: if (result < 0) { jbe@0: int sqlerrcode; jbe@1: if (output_string) pfree(output_string); jbe@0: switch(result) { jbe@0: case UTF8PROC_ERROR_NOMEM: jbe@0: sqlerrcode = ERRCODE_OUT_OF_MEMORY; break; jbe@0: case UTF8PROC_ERROR_OVERFLOW: jbe@0: sqlerrcode = ERRCODE_PROGRAM_LIMIT_EXCEEDED; break; jbe@0: case UTF8PROC_ERROR_INVALIDUTF8: jbe@0: case UTF8PROC_ERROR_NOTASSIGNED: jbe@7: return; jbe@0: default: jbe@0: sqlerrcode = ERRCODE_INTERNAL_ERROR; jbe@0: } jbe@0: ereport(ERROR, ( jbe@0: errcode(sqlerrcode), jbe@0: errmsg("%s", utf8proc_errmsg(result)) jbe@0: )); jbe@0: } jbe@0: } jbe@0: jbe@7: PG_FUNCTION_INFO_V1(utf8proc_pgsql_unifold); jbe@7: Datum utf8proc_pgsql_unifold(PG_FUNCTION_ARGS) { jbe@7: text *input_string; jbe@7: text *output_string = NULL; jbe@7: ssize_t result; jbe@7: input_string = PG_GETARG_TEXT_P(0); jbe@7: result = utf8proc_pgsql_utf8map( jbe@7: input_string, &output_string, UTF8PROC_PGSQL_FOLD_OPTS jbe@7: ); jbe@7: PG_FREE_IF_COPY(input_string, 0); jbe@7: utf8proc_pgsql_utf8map_errchk(result, output_string); jbe@7: if (result >= 0) { jbe@7: PG_RETURN_TEXT_P(output_string); jbe@7: } else { jbe@7: PG_RETURN_NULL(); jbe@7: } jbe@7: } jbe@0: jbe@7: PG_FUNCTION_INFO_V1(utf8proc_pgsql_unistrip); jbe@7: Datum utf8proc_pgsql_unistrip(PG_FUNCTION_ARGS) { jbe@7: text *input_string; jbe@7: text *output_string = NULL; jbe@7: ssize_t result; jbe@7: input_string = PG_GETARG_TEXT_P(0); jbe@7: result = utf8proc_pgsql_utf8map( jbe@7: input_string, &output_string, UTF8PROC_PGSQL_STRIP_OPTS jbe@7: ); jbe@7: PG_FREE_IF_COPY(input_string, 0); jbe@7: utf8proc_pgsql_utf8map_errchk(result, output_string); jbe@7: if (result >= 0) { jbe@7: PG_RETURN_TEXT_P(output_string); jbe@7: } else { jbe@7: PG_RETURN_NULL(); jbe@7: } jbe@7: } jbe@7: