| rev | line source | 
| jbe@0 | 1 /* | 
| jbe@10 | 2  *  Copyright (c) Public Software Group e. V., Berlin, Germany | 
| jbe@0 | 3  * | 
| jbe@7 | 4  *  Permission is hereby granted, free of charge, to any person obtaining a | 
| jbe@7 | 5  *  copy of this software and associated documentation files (the "Software"), | 
| jbe@7 | 6  *  to deal in the Software without restriction, including without limitation | 
| jbe@7 | 7  *  the rights to use, copy, modify, merge, publish, distribute, sublicense, | 
| jbe@7 | 8  *  and/or sell copies of the Software, and to permit persons to whom the | 
| jbe@7 | 9  *  Software is furnished to do so, subject to the following conditions: | 
| jbe@0 | 10  * | 
| jbe@7 | 11  *  The above copyright notice and this permission notice shall be included in | 
| jbe@7 | 12  *  all copies or substantial portions of the Software. | 
| jbe@0 | 13  * | 
| jbe@7 | 14  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
| jbe@7 | 15  *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
| jbe@7 | 16  *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | 
| jbe@7 | 17  *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 
| jbe@7 | 18  *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | 
| jbe@7 | 19  *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | 
| jbe@7 | 20  *  DEALINGS IN THE SOFTWARE. | 
| jbe@0 | 21  */ | 
| jbe@7 | 22 | 
| jbe@0 | 23 | 
| jbe@0 | 24 /* | 
| jbe@0 | 25  *  File name:    pgsql/utf8proc_pgsql.c | 
| jbe@0 | 26  * | 
| jbe@0 | 27  *  Description: | 
| jbe@7 | 28  *  PostgreSQL extension to provide two functions 'unifold' and 'unistrip', | 
| jbe@7 | 29  *  which can be used to case-fold and normalize index fields and | 
| jbe@7 | 30  *  optionally strip marks (e.g. accents) from strings. | 
| jbe@0 | 31  */ | 
| jbe@0 | 32 | 
| jbe@0 | 33 | 
| jbe@0 | 34 #include "../utf8proc.c" | 
| jbe@0 | 35 | 
| jbe@0 | 36 #include <postgres.h> | 
| jbe@0 | 37 #include <utils/elog.h> | 
| jbe@0 | 38 #include <fmgr.h> | 
| jbe@1 | 39 #include <string.h> | 
| jbe@0 | 40 #include <unistd.h> | 
| jbe@0 | 41 #include <utils/builtins.h> | 
| jbe@0 | 42 | 
| jbe@5 | 43 #ifdef PG_MODULE_MAGIC | 
| jbe@5 | 44 PG_MODULE_MAGIC; | 
| jbe@5 | 45 #endif | 
| jbe@5 | 46 | 
| jbe@7 | 47 #define UTF8PROC_PGSQL_FOLD_OPTS ( UTF8PROC_REJECTNA | UTF8PROC_COMPAT | \ | 
| jbe@2 | 48   UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | \ | 
| jbe@3 | 49   UTF8PROC_NLF2LF | UTF8PROC_CASEFOLD | UTF8PROC_LUMP ) | 
| jbe@7 | 50 #define UTF8PROC_PGSQL_STRIP_OPTS ( UTF8PROC_REJECTNA | UTF8PROC_COMPAT | \ | 
| jbe@7 | 51   UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | \ | 
| jbe@7 | 52   UTF8PROC_NLF2LF | UTF8PROC_CASEFOLD | UTF8PROC_LUMP | UTF8PROC_STRIPMARK ) | 
| jbe@1 | 53 | 
| jbe@7 | 54 ssize_t utf8proc_pgsql_utf8map( | 
| jbe@7 | 55   text *input_string, text **output_string_ptr, int options | 
| jbe@7 | 56 ) { | 
| jbe@0 | 57   ssize_t result; | 
| jbe@7 | 58   text *output_string; | 
| jbe@7 | 59   result = utf8proc_decompose( | 
| jbe@7 | 60     VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ, | 
| jbe@7 | 61     NULL, 0, options | 
| jbe@7 | 62   ); | 
| jbe@7 | 63   if (result < 0) return result; | 
| jbe@7 | 64   if (result > (SIZE_MAX-1-VARHDRSZ)/sizeof(int32_t)) | 
| jbe@7 | 65     return UTF8PROC_ERROR_OVERFLOW; | 
| jbe@10 | 66   /* reserve one extra byte for termination */ | 
| jbe@7 | 67   *output_string_ptr = palloc(result * sizeof(int32_t) + 1 + VARHDRSZ); | 
| jbe@7 | 68   output_string = *output_string_ptr; | 
| jbe@7 | 69   if (!output_string) return UTF8PROC_ERROR_NOMEM; | 
| jbe@7 | 70   result = utf8proc_decompose( | 
| jbe@7 | 71     VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ, | 
| jbe@7 | 72     (int32_t *)VARDATA(output_string), result, options | 
| jbe@7 | 73   ); | 
| jbe@7 | 74   if (result < 0) return result; | 
| jbe@7 | 75   result = utf8proc_reencode( | 
| jbe@7 | 76     (int32_t *)VARDATA(output_string), result, options | 
| jbe@7 | 77   ); | 
| jbe@9 | 78   if (result >= 0) SET_VARSIZE(output_string, result + VARHDRSZ); | 
| jbe@7 | 79   return result; | 
| jbe@7 | 80 } | 
| jbe@7 | 81 | 
| jbe@7 | 82 void utf8proc_pgsql_utf8map_errchk(ssize_t result, text *output_string) { | 
| jbe@0 | 83   if (result < 0) { | 
| jbe@0 | 84     int sqlerrcode; | 
| jbe@1 | 85     if (output_string) pfree(output_string); | 
| jbe@0 | 86     switch(result) { | 
| jbe@0 | 87       case UTF8PROC_ERROR_NOMEM: | 
| jbe@0 | 88       sqlerrcode = ERRCODE_OUT_OF_MEMORY; break; | 
| jbe@0 | 89       case UTF8PROC_ERROR_OVERFLOW: | 
| jbe@0 | 90       sqlerrcode = ERRCODE_PROGRAM_LIMIT_EXCEEDED; break; | 
| jbe@0 | 91       case UTF8PROC_ERROR_INVALIDUTF8: | 
| jbe@0 | 92       case UTF8PROC_ERROR_NOTASSIGNED: | 
| jbe@7 | 93       return; | 
| jbe@0 | 94       default: | 
| jbe@0 | 95       sqlerrcode = ERRCODE_INTERNAL_ERROR; | 
| jbe@0 | 96     } | 
| jbe@0 | 97     ereport(ERROR, ( | 
| jbe@0 | 98       errcode(sqlerrcode), | 
| jbe@0 | 99       errmsg("%s", utf8proc_errmsg(result)) | 
| jbe@0 | 100     )); | 
| jbe@0 | 101   } | 
| jbe@0 | 102 } | 
| jbe@0 | 103 | 
| jbe@7 | 104 PG_FUNCTION_INFO_V1(utf8proc_pgsql_unifold); | 
| jbe@7 | 105 Datum utf8proc_pgsql_unifold(PG_FUNCTION_ARGS) { | 
| jbe@7 | 106   text *input_string; | 
| jbe@7 | 107   text *output_string = NULL; | 
| jbe@7 | 108   ssize_t result; | 
| jbe@7 | 109   input_string = PG_GETARG_TEXT_P(0); | 
| jbe@7 | 110   result = utf8proc_pgsql_utf8map( | 
| jbe@7 | 111     input_string, &output_string, UTF8PROC_PGSQL_FOLD_OPTS | 
| jbe@7 | 112   ); | 
| jbe@7 | 113   PG_FREE_IF_COPY(input_string, 0); | 
| jbe@7 | 114   utf8proc_pgsql_utf8map_errchk(result, output_string); | 
| jbe@7 | 115   if (result >= 0) { | 
| jbe@7 | 116     PG_RETURN_TEXT_P(output_string); | 
| jbe@7 | 117   } else { | 
| jbe@7 | 118     PG_RETURN_NULL(); | 
| jbe@7 | 119   } | 
| jbe@7 | 120 } | 
| jbe@0 | 121 | 
| jbe@7 | 122 PG_FUNCTION_INFO_V1(utf8proc_pgsql_unistrip); | 
| jbe@7 | 123 Datum utf8proc_pgsql_unistrip(PG_FUNCTION_ARGS) { | 
| jbe@7 | 124   text *input_string; | 
| jbe@7 | 125   text *output_string = NULL; | 
| jbe@7 | 126   ssize_t result; | 
| jbe@7 | 127   input_string = PG_GETARG_TEXT_P(0); | 
| jbe@7 | 128   result = utf8proc_pgsql_utf8map( | 
| jbe@7 | 129     input_string, &output_string, UTF8PROC_PGSQL_STRIP_OPTS | 
| jbe@7 | 130   ); | 
| jbe@7 | 131   PG_FREE_IF_COPY(input_string, 0); | 
| jbe@7 | 132   utf8proc_pgsql_utf8map_errchk(result, output_string); | 
| jbe@7 | 133   if (result >= 0) { | 
| jbe@7 | 134     PG_RETURN_TEXT_P(output_string); | 
| jbe@7 | 135   } else { | 
| jbe@7 | 136     PG_RETURN_NULL(); | 
| jbe@7 | 137   } | 
| jbe@7 | 138 } | 
| jbe@7 | 139 |