utf8proc
annotate pgsql/utf8proc_pgsql.c @ 7:fcfd8c836c64
Version 1.1.1
- Added a new PostgreSQL function 'unistrip', which behaves like 'unifold', but also removes all character marks (e.g. accents).
- Changed license from BSD to MIT style.
- Added a new function 'utf8proc_codepoint_valid' to the C library.
- Changed compiler flags in Makefile from -g -O0 to -O2
- The ruby script, which was used to build the utf8proc_data.c file, is now included in the distribution.
- Added a new PostgreSQL function 'unistrip', which behaves like 'unifold', but also removes all character marks (e.g. accents).
- Changed license from BSD to MIT style.
- Added a new function 'utf8proc_codepoint_valid' to the C library.
- Changed compiler flags in Makefile from -g -O0 to -O2
- The ruby script, which was used to build the utf8proc_data.c file, is now included in the distribution.
| author | jbe | 
|---|---|
| date | Sun Jul 22 12:00:00 2007 +0200 (2007-07-22) | 
| parents | c18366878af9 | 
| children | 951e73a98021 | 
| rev | line source | 
|---|---|
| jbe@0 | 1 /* | 
| jbe@7 | 2 * Copyright (c) 2006-2007 Jan Behrens, FlexiGuided GmbH, Berlin | 
| jbe@0 | 3 * | 
| jbe@7 | 4 * Permission is hereby granted, free of charge, to any person obtaining a | 
| jbe@7 | 5 * copy of this software and associated documentation files (the "Software"), | 
| jbe@7 | 6 * to deal in the Software without restriction, including without limitation | 
| jbe@7 | 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, | 
| jbe@7 | 8 * and/or sell copies of the Software, and to permit persons to whom the | 
| jbe@7 | 9 * Software is furnished to do so, subject to the following conditions: | 
| jbe@0 | 10 * | 
| jbe@7 | 11 * The above copyright notice and this permission notice shall be included in | 
| jbe@7 | 12 * all copies or substantial portions of the Software. | 
| jbe@0 | 13 * | 
| jbe@7 | 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
| jbe@7 | 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
| jbe@7 | 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | 
| jbe@7 | 17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 
| jbe@7 | 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | 
| jbe@7 | 19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | 
| jbe@7 | 20 * DEALINGS IN THE SOFTWARE. | 
| jbe@0 | 21 */ | 
| jbe@7 | 22 | 
| jbe@0 | 23 | 
| jbe@0 | 24 /* | 
| jbe@0 | 25 * File name: pgsql/utf8proc_pgsql.c | 
| jbe@7 | 26 * Version: 1.1.1 | 
| jbe@7 | 27 * Last changed: 2007-07-22 | 
| jbe@0 | 28 * | 
| jbe@0 | 29 * Description: | 
| jbe@7 | 30 * PostgreSQL extension to provide two functions 'unifold' and 'unistrip', | 
| jbe@7 | 31 * which can be used to case-fold and normalize index fields and | 
| jbe@7 | 32 * optionally strip marks (e.g. accents) from strings. | 
| jbe@0 | 33 */ | 
| jbe@0 | 34 | 
| jbe@0 | 35 | 
| jbe@0 | 36 #include "../utf8proc.c" | 
| jbe@0 | 37 | 
| jbe@0 | 38 #include <postgres.h> | 
| jbe@0 | 39 #include <utils/elog.h> | 
| jbe@0 | 40 #include <fmgr.h> | 
| jbe@1 | 41 #include <string.h> | 
| jbe@0 | 42 #include <unistd.h> | 
| jbe@0 | 43 #include <utils/builtins.h> | 
| jbe@0 | 44 | 
| jbe@5 | 45 #ifdef PG_MODULE_MAGIC | 
| jbe@5 | 46 PG_MODULE_MAGIC; | 
| jbe@5 | 47 #endif | 
| jbe@5 | 48 | 
| jbe@7 | 49 #define UTF8PROC_PGSQL_FOLD_OPTS ( UTF8PROC_REJECTNA | UTF8PROC_COMPAT | \ | 
| jbe@2 | 50 UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | \ | 
| jbe@3 | 51 UTF8PROC_NLF2LF | UTF8PROC_CASEFOLD | UTF8PROC_LUMP ) | 
| jbe@7 | 52 #define UTF8PROC_PGSQL_STRIP_OPTS ( UTF8PROC_REJECTNA | UTF8PROC_COMPAT | \ | 
| jbe@7 | 53 UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | \ | 
| jbe@7 | 54 UTF8PROC_NLF2LF | UTF8PROC_CASEFOLD | UTF8PROC_LUMP | UTF8PROC_STRIPMARK ) | 
| jbe@1 | 55 | 
| jbe@7 | 56 ssize_t utf8proc_pgsql_utf8map( | 
| jbe@7 | 57 text *input_string, text **output_string_ptr, int options | 
| jbe@7 | 58 ) { | 
| jbe@0 | 59 ssize_t result; | 
| jbe@7 | 60 text *output_string; | 
| jbe@7 | 61 result = utf8proc_decompose( | 
| jbe@7 | 62 VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ, | 
| jbe@7 | 63 NULL, 0, options | 
| jbe@7 | 64 ); | 
| jbe@7 | 65 if (result < 0) return result; | 
| jbe@7 | 66 if (result > (SIZE_MAX-1-VARHDRSZ)/sizeof(int32_t)) | 
| jbe@7 | 67 return UTF8PROC_ERROR_OVERFLOW; | 
| jbe@7 | 68 // reserve one extra byte for termination | 
| jbe@7 | 69 *output_string_ptr = palloc(result * sizeof(int32_t) + 1 + VARHDRSZ); | 
| jbe@7 | 70 output_string = *output_string_ptr; | 
| jbe@7 | 71 if (!output_string) return UTF8PROC_ERROR_NOMEM; | 
| jbe@7 | 72 result = utf8proc_decompose( | 
| jbe@7 | 73 VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ, | 
| jbe@7 | 74 (int32_t *)VARDATA(output_string), result, options | 
| jbe@7 | 75 ); | 
| jbe@7 | 76 if (result < 0) return result; | 
| jbe@7 | 77 result = utf8proc_reencode( | 
| jbe@7 | 78 (int32_t *)VARDATA(output_string), result, options | 
| jbe@7 | 79 ); | 
| jbe@7 | 80 if (result >= 0) VARATT_SIZEP(output_string) = result + VARHDRSZ; | 
| jbe@7 | 81 return result; | 
| jbe@7 | 82 } | 
| jbe@7 | 83 | 
| jbe@7 | 84 void utf8proc_pgsql_utf8map_errchk(ssize_t result, text *output_string) { | 
| jbe@0 | 85 if (result < 0) { | 
| jbe@0 | 86 int sqlerrcode; | 
| jbe@1 | 87 if (output_string) pfree(output_string); | 
| jbe@0 | 88 switch(result) { | 
| jbe@0 | 89 case UTF8PROC_ERROR_NOMEM: | 
| jbe@0 | 90 sqlerrcode = ERRCODE_OUT_OF_MEMORY; break; | 
| jbe@0 | 91 case UTF8PROC_ERROR_OVERFLOW: | 
| jbe@0 | 92 sqlerrcode = ERRCODE_PROGRAM_LIMIT_EXCEEDED; break; | 
| jbe@0 | 93 case UTF8PROC_ERROR_INVALIDUTF8: | 
| jbe@0 | 94 case UTF8PROC_ERROR_NOTASSIGNED: | 
| jbe@7 | 95 return; | 
| jbe@0 | 96 default: | 
| jbe@0 | 97 sqlerrcode = ERRCODE_INTERNAL_ERROR; | 
| jbe@0 | 98 } | 
| jbe@0 | 99 ereport(ERROR, ( | 
| jbe@0 | 100 errcode(sqlerrcode), | 
| jbe@0 | 101 errmsg("%s", utf8proc_errmsg(result)) | 
| jbe@0 | 102 )); | 
| jbe@0 | 103 } | 
| jbe@0 | 104 } | 
| jbe@0 | 105 | 
| jbe@7 | 106 PG_FUNCTION_INFO_V1(utf8proc_pgsql_unifold); | 
| jbe@7 | 107 Datum utf8proc_pgsql_unifold(PG_FUNCTION_ARGS) { | 
| jbe@7 | 108 text *input_string; | 
| jbe@7 | 109 text *output_string = NULL; | 
| jbe@7 | 110 ssize_t result; | 
| jbe@7 | 111 input_string = PG_GETARG_TEXT_P(0); | 
| jbe@7 | 112 result = utf8proc_pgsql_utf8map( | 
| jbe@7 | 113 input_string, &output_string, UTF8PROC_PGSQL_FOLD_OPTS | 
| jbe@7 | 114 ); | 
| jbe@7 | 115 PG_FREE_IF_COPY(input_string, 0); | 
| jbe@7 | 116 utf8proc_pgsql_utf8map_errchk(result, output_string); | 
| jbe@7 | 117 if (result >= 0) { | 
| jbe@7 | 118 PG_RETURN_TEXT_P(output_string); | 
| jbe@7 | 119 } else { | 
| jbe@7 | 120 PG_RETURN_NULL(); | 
| jbe@7 | 121 } | 
| jbe@7 | 122 } | 
| jbe@0 | 123 | 
| jbe@7 | 124 PG_FUNCTION_INFO_V1(utf8proc_pgsql_unistrip); | 
| jbe@7 | 125 Datum utf8proc_pgsql_unistrip(PG_FUNCTION_ARGS) { | 
| jbe@7 | 126 text *input_string; | 
| jbe@7 | 127 text *output_string = NULL; | 
| jbe@7 | 128 ssize_t result; | 
| jbe@7 | 129 input_string = PG_GETARG_TEXT_P(0); | 
| jbe@7 | 130 result = utf8proc_pgsql_utf8map( | 
| jbe@7 | 131 input_string, &output_string, UTF8PROC_PGSQL_STRIP_OPTS | 
| jbe@7 | 132 ); | 
| jbe@7 | 133 PG_FREE_IF_COPY(input_string, 0); | 
| jbe@7 | 134 utf8proc_pgsql_utf8map_errchk(result, output_string); | 
| jbe@7 | 135 if (result >= 0) { | 
| jbe@7 | 136 PG_RETURN_TEXT_P(output_string); | 
| jbe@7 | 137 } else { | 
| jbe@7 | 138 PG_RETURN_NULL(); | 
| jbe@7 | 139 } | 
| jbe@7 | 140 } | 
| jbe@7 | 141 |