utf8proc
annotate pgsql/utf8proc_pgsql.c @ 11:d79da2302625
Changes for Ruby 1.9 compatibility
- Use RSTRING_PTR() and RSTRING_LEN() instead of RSTRING()->ptr and RSTRING()->len for ruby1.9 compatibility (and #define them, if not existent)
- Use RSTRING_PTR() and RSTRING_LEN() instead of RSTRING()->ptr and RSTRING()->len for ruby1.9 compatibility (and #define them, if not existent)
author | jbe |
---|---|
date | Thu Aug 20 12:00:00 2009 +0200 (2009-08-20) |
parents | 00d2bcbdc945 |
children |
rev | line source |
---|---|
jbe@0 | 1 /* |
jbe@10 | 2 * Copyright (c) Public Software Group e. V., Berlin, Germany |
jbe@0 | 3 * |
jbe@7 | 4 * Permission is hereby granted, free of charge, to any person obtaining a |
jbe@7 | 5 * copy of this software and associated documentation files (the "Software"), |
jbe@7 | 6 * to deal in the Software without restriction, including without limitation |
jbe@7 | 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
jbe@7 | 8 * and/or sell copies of the Software, and to permit persons to whom the |
jbe@7 | 9 * Software is furnished to do so, subject to the following conditions: |
jbe@0 | 10 * |
jbe@7 | 11 * The above copyright notice and this permission notice shall be included in |
jbe@7 | 12 * all copies or substantial portions of the Software. |
jbe@0 | 13 * |
jbe@7 | 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
jbe@7 | 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
jbe@7 | 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
jbe@7 | 17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
jbe@7 | 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
jbe@7 | 19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
jbe@7 | 20 * DEALINGS IN THE SOFTWARE. |
jbe@0 | 21 */ |
jbe@7 | 22 |
jbe@0 | 23 |
jbe@0 | 24 /* |
jbe@0 | 25 * File name: pgsql/utf8proc_pgsql.c |
jbe@0 | 26 * |
jbe@0 | 27 * Description: |
jbe@7 | 28 * PostgreSQL extension to provide two functions 'unifold' and 'unistrip', |
jbe@7 | 29 * which can be used to case-fold and normalize index fields and |
jbe@7 | 30 * optionally strip marks (e.g. accents) from strings. |
jbe@0 | 31 */ |
jbe@0 | 32 |
jbe@0 | 33 |
jbe@0 | 34 #include "../utf8proc.c" |
jbe@0 | 35 |
jbe@0 | 36 #include <postgres.h> |
jbe@0 | 37 #include <utils/elog.h> |
jbe@0 | 38 #include <fmgr.h> |
jbe@1 | 39 #include <string.h> |
jbe@0 | 40 #include <unistd.h> |
jbe@0 | 41 #include <utils/builtins.h> |
jbe@0 | 42 |
jbe@5 | 43 #ifdef PG_MODULE_MAGIC |
jbe@5 | 44 PG_MODULE_MAGIC; |
jbe@5 | 45 #endif |
jbe@5 | 46 |
jbe@7 | 47 #define UTF8PROC_PGSQL_FOLD_OPTS ( UTF8PROC_REJECTNA | UTF8PROC_COMPAT | \ |
jbe@2 | 48 UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | \ |
jbe@3 | 49 UTF8PROC_NLF2LF | UTF8PROC_CASEFOLD | UTF8PROC_LUMP ) |
jbe@7 | 50 #define UTF8PROC_PGSQL_STRIP_OPTS ( UTF8PROC_REJECTNA | UTF8PROC_COMPAT | \ |
jbe@7 | 51 UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | \ |
jbe@7 | 52 UTF8PROC_NLF2LF | UTF8PROC_CASEFOLD | UTF8PROC_LUMP | UTF8PROC_STRIPMARK ) |
jbe@1 | 53 |
jbe@7 | 54 ssize_t utf8proc_pgsql_utf8map( |
jbe@7 | 55 text *input_string, text **output_string_ptr, int options |
jbe@7 | 56 ) { |
jbe@0 | 57 ssize_t result; |
jbe@7 | 58 text *output_string; |
jbe@7 | 59 result = utf8proc_decompose( |
jbe@7 | 60 VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ, |
jbe@7 | 61 NULL, 0, options |
jbe@7 | 62 ); |
jbe@7 | 63 if (result < 0) return result; |
jbe@7 | 64 if (result > (SIZE_MAX-1-VARHDRSZ)/sizeof(int32_t)) |
jbe@7 | 65 return UTF8PROC_ERROR_OVERFLOW; |
jbe@10 | 66 /* reserve one extra byte for termination */ |
jbe@7 | 67 *output_string_ptr = palloc(result * sizeof(int32_t) + 1 + VARHDRSZ); |
jbe@7 | 68 output_string = *output_string_ptr; |
jbe@7 | 69 if (!output_string) return UTF8PROC_ERROR_NOMEM; |
jbe@7 | 70 result = utf8proc_decompose( |
jbe@7 | 71 VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ, |
jbe@7 | 72 (int32_t *)VARDATA(output_string), result, options |
jbe@7 | 73 ); |
jbe@7 | 74 if (result < 0) return result; |
jbe@7 | 75 result = utf8proc_reencode( |
jbe@7 | 76 (int32_t *)VARDATA(output_string), result, options |
jbe@7 | 77 ); |
jbe@9 | 78 if (result >= 0) SET_VARSIZE(output_string, result + VARHDRSZ); |
jbe@7 | 79 return result; |
jbe@7 | 80 } |
jbe@7 | 81 |
jbe@7 | 82 void utf8proc_pgsql_utf8map_errchk(ssize_t result, text *output_string) { |
jbe@0 | 83 if (result < 0) { |
jbe@0 | 84 int sqlerrcode; |
jbe@1 | 85 if (output_string) pfree(output_string); |
jbe@0 | 86 switch(result) { |
jbe@0 | 87 case UTF8PROC_ERROR_NOMEM: |
jbe@0 | 88 sqlerrcode = ERRCODE_OUT_OF_MEMORY; break; |
jbe@0 | 89 case UTF8PROC_ERROR_OVERFLOW: |
jbe@0 | 90 sqlerrcode = ERRCODE_PROGRAM_LIMIT_EXCEEDED; break; |
jbe@0 | 91 case UTF8PROC_ERROR_INVALIDUTF8: |
jbe@0 | 92 case UTF8PROC_ERROR_NOTASSIGNED: |
jbe@7 | 93 return; |
jbe@0 | 94 default: |
jbe@0 | 95 sqlerrcode = ERRCODE_INTERNAL_ERROR; |
jbe@0 | 96 } |
jbe@0 | 97 ereport(ERROR, ( |
jbe@0 | 98 errcode(sqlerrcode), |
jbe@0 | 99 errmsg("%s", utf8proc_errmsg(result)) |
jbe@0 | 100 )); |
jbe@0 | 101 } |
jbe@0 | 102 } |
jbe@0 | 103 |
jbe@7 | 104 PG_FUNCTION_INFO_V1(utf8proc_pgsql_unifold); |
jbe@7 | 105 Datum utf8proc_pgsql_unifold(PG_FUNCTION_ARGS) { |
jbe@7 | 106 text *input_string; |
jbe@7 | 107 text *output_string = NULL; |
jbe@7 | 108 ssize_t result; |
jbe@7 | 109 input_string = PG_GETARG_TEXT_P(0); |
jbe@7 | 110 result = utf8proc_pgsql_utf8map( |
jbe@7 | 111 input_string, &output_string, UTF8PROC_PGSQL_FOLD_OPTS |
jbe@7 | 112 ); |
jbe@7 | 113 PG_FREE_IF_COPY(input_string, 0); |
jbe@7 | 114 utf8proc_pgsql_utf8map_errchk(result, output_string); |
jbe@7 | 115 if (result >= 0) { |
jbe@7 | 116 PG_RETURN_TEXT_P(output_string); |
jbe@7 | 117 } else { |
jbe@7 | 118 PG_RETURN_NULL(); |
jbe@7 | 119 } |
jbe@7 | 120 } |
jbe@0 | 121 |
jbe@7 | 122 PG_FUNCTION_INFO_V1(utf8proc_pgsql_unistrip); |
jbe@7 | 123 Datum utf8proc_pgsql_unistrip(PG_FUNCTION_ARGS) { |
jbe@7 | 124 text *input_string; |
jbe@7 | 125 text *output_string = NULL; |
jbe@7 | 126 ssize_t result; |
jbe@7 | 127 input_string = PG_GETARG_TEXT_P(0); |
jbe@7 | 128 result = utf8proc_pgsql_utf8map( |
jbe@7 | 129 input_string, &output_string, UTF8PROC_PGSQL_STRIP_OPTS |
jbe@7 | 130 ); |
jbe@7 | 131 PG_FREE_IF_COPY(input_string, 0); |
jbe@7 | 132 utf8proc_pgsql_utf8map_errchk(result, output_string); |
jbe@7 | 133 if (result >= 0) { |
jbe@7 | 134 PG_RETURN_TEXT_P(output_string); |
jbe@7 | 135 } else { |
jbe@7 | 136 PG_RETURN_NULL(); |
jbe@7 | 137 } |
jbe@7 | 138 } |
jbe@7 | 139 |