utf8proc
view pgsql/utf8proc_pgsql.c @ 17:47b467f4c128
Contribution from libmojibake fork (missing file "normtest.c")
| author | Jiahao Chen, Steven G. Johnson, Anthony David Kelman | 
|---|---|
| date | Mon Dec 01 14:32:19 2014 -0500 (2014-12-01) | 
| parents | 00d2bcbdc945 | 
| children | 
 line source
     1 /*
     2  *  Copyright (c) Public Software Group e. V., Berlin, Germany
     3  *
     4  *  Permission is hereby granted, free of charge, to any person obtaining a
     5  *  copy of this software and associated documentation files (the "Software"),
     6  *  to deal in the Software without restriction, including without limitation
     7  *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
     8  *  and/or sell copies of the Software, and to permit persons to whom the
     9  *  Software is furnished to do so, subject to the following conditions:
    10  *
    11  *  The above copyright notice and this permission notice shall be included in
    12  *  all copies or substantial portions of the Software.
    13  *
    14  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    15  *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    16  *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    17  *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    18  *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    19  *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
    20  *  DEALINGS IN THE SOFTWARE.
    21  */
    24 /*
    25  *  File name:    pgsql/utf8proc_pgsql.c
    26  *
    27  *  Description:
    28  *  PostgreSQL extension to provide two functions 'unifold' and 'unistrip',
    29  *  which can be used to case-fold and normalize index fields and
    30  *  optionally strip marks (e.g. accents) from strings.
    31  */
    34 #include "../utf8proc.c"
    36 #include <postgres.h>
    37 #include <utils/elog.h>
    38 #include <fmgr.h>
    39 #include <string.h>
    40 #include <unistd.h>
    41 #include <utils/builtins.h>
    43 #ifdef PG_MODULE_MAGIC
    44 PG_MODULE_MAGIC;
    45 #endif
    47 #define UTF8PROC_PGSQL_FOLD_OPTS ( UTF8PROC_REJECTNA | UTF8PROC_COMPAT | \
    48   UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | \
    49   UTF8PROC_NLF2LF | UTF8PROC_CASEFOLD | UTF8PROC_LUMP )
    50 #define UTF8PROC_PGSQL_STRIP_OPTS ( UTF8PROC_REJECTNA | UTF8PROC_COMPAT | \
    51   UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | \
    52   UTF8PROC_NLF2LF | UTF8PROC_CASEFOLD | UTF8PROC_LUMP | UTF8PROC_STRIPMARK )
    54 ssize_t utf8proc_pgsql_utf8map(
    55   text *input_string, text **output_string_ptr, int options
    56 ) {
    57   ssize_t result;
    58   text *output_string;
    59   result = utf8proc_decompose(
    60     VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ,
    61     NULL, 0, options
    62   );
    63   if (result < 0) return result;
    64   if (result > (SIZE_MAX-1-VARHDRSZ)/sizeof(int32_t))
    65     return UTF8PROC_ERROR_OVERFLOW;
    66   /* reserve one extra byte for termination */
    67   *output_string_ptr = palloc(result * sizeof(int32_t) + 1 + VARHDRSZ);
    68   output_string = *output_string_ptr;
    69   if (!output_string) return UTF8PROC_ERROR_NOMEM;
    70   result = utf8proc_decompose(
    71     VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ,
    72     (int32_t *)VARDATA(output_string), result, options
    73   );
    74   if (result < 0) return result;
    75   result = utf8proc_reencode(
    76     (int32_t *)VARDATA(output_string), result, options
    77   );
    78   if (result >= 0) SET_VARSIZE(output_string, result + VARHDRSZ);
    79   return result;
    80 }
    82 void utf8proc_pgsql_utf8map_errchk(ssize_t result, text *output_string) {
    83   if (result < 0) {
    84     int sqlerrcode;
    85     if (output_string) pfree(output_string);
    86     switch(result) {
    87       case UTF8PROC_ERROR_NOMEM:
    88       sqlerrcode = ERRCODE_OUT_OF_MEMORY; break;
    89       case UTF8PROC_ERROR_OVERFLOW:
    90       sqlerrcode = ERRCODE_PROGRAM_LIMIT_EXCEEDED; break;
    91       case UTF8PROC_ERROR_INVALIDUTF8:
    92       case UTF8PROC_ERROR_NOTASSIGNED:
    93       return;
    94       default:
    95       sqlerrcode = ERRCODE_INTERNAL_ERROR;
    96     }
    97     ereport(ERROR, (
    98       errcode(sqlerrcode),
    99       errmsg("%s", utf8proc_errmsg(result))
   100     ));
   101   }
   102 }
   104 PG_FUNCTION_INFO_V1(utf8proc_pgsql_unifold);
   105 Datum utf8proc_pgsql_unifold(PG_FUNCTION_ARGS) {
   106   text *input_string;
   107   text *output_string = NULL;
   108   ssize_t result;
   109   input_string = PG_GETARG_TEXT_P(0);
   110   result = utf8proc_pgsql_utf8map(
   111     input_string, &output_string, UTF8PROC_PGSQL_FOLD_OPTS
   112   );
   113   PG_FREE_IF_COPY(input_string, 0);
   114   utf8proc_pgsql_utf8map_errchk(result, output_string);
   115   if (result >= 0) {
   116     PG_RETURN_TEXT_P(output_string);
   117   } else {
   118     PG_RETURN_NULL();
   119   }
   120 }
   122 PG_FUNCTION_INFO_V1(utf8proc_pgsql_unistrip);
   123 Datum utf8proc_pgsql_unistrip(PG_FUNCTION_ARGS) {
   124   text *input_string;
   125   text *output_string = NULL;
   126   ssize_t result;
   127   input_string = PG_GETARG_TEXT_P(0);
   128   result = utf8proc_pgsql_utf8map(
   129     input_string, &output_string, UTF8PROC_PGSQL_STRIP_OPTS
   130   );
   131   PG_FREE_IF_COPY(input_string, 0);
   132   utf8proc_pgsql_utf8map_errchk(result, output_string);
   133   if (result >= 0) {
   134     PG_RETURN_TEXT_P(output_string);
   135   } else {
   136     PG_RETURN_NULL();
   137   }
   138 }
