utf8proc
view pgsql/utf8proc_pgsql.c @ 5:c18366878af9
Version 1.0.2
- included a check in Integer#utf8, which raises an exception, if the given code-point is invalid because of being too high (this was missing yet)
- added support for PostgreSQL version 8.2
- included a check in Integer#utf8, which raises an exception, if the given code-point is invalid because of being too high (this was missing yet)
- added support for PostgreSQL version 8.2
| author | jbe | 
|---|---|
| date | Tue Dec 26 12:00:00 2006 +0100 (2006-12-26) | 
| parents | 4ee0d5f54af1 | 
| children | fcfd8c836c64 | 
 line source
     1 /*
     2  *  Copyright (c) 2006, FlexiGuided GmbH, Berlin, Germany
     3  *  Author: Jan Behrens <jan.behrens@flexiguided.de>
     4  *  All rights reserved.
     5  *
     6  *  Redistribution and use in source and binary forms, with or without
     7  *  modification, are permitted provided that the following conditions are
     8  *  met:
     9  *
    10  *  1. Redistributions of source code must retain the above copyright
    11  *     notice, this list of conditions and the following disclaimer.
    12  *  2. Redistributions in binary form must reproduce the above copyright
    13  *     notice, this list of conditions and the following disclaimer in the
    14  *     documentation and/or other materials provided with the distribution.
    15  *  3. Neither the name of the FlexiGuided GmbH nor the names of its
    16  *     contributors may be used to endorse or promote products derived from
    17  *     this software without specific prior written permission.
    18  *
    19  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    20  *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    21  *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    22  *  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
    23  *  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    24  *  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    25  *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
    26  *  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
    27  *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
    28  *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
    29  *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    30  *
    31  */
    34 /*
    35  *  File name:    pgsql/utf8proc_pgsql.c
    36  *  Version:      1.0
    37  *  Last changed: 2006-09-17
    38  *
    39  *  Description:
    40  *  PostgreSQL extension to provide a function 'unifold', which can be used
    41  *  to case-fold and normalize index fields.
    42  */
    45 #include "../utf8proc.c"
    47 #include <postgres.h>
    48 #include <utils/elog.h>
    49 #include <fmgr.h>
    50 #include <string.h>
    51 #include <unistd.h>
    52 #include <utils/builtins.h>
    54 #ifdef PG_MODULE_MAGIC
    55 PG_MODULE_MAGIC;
    56 #endif
    58 #define UTF8PROC_PGSQL_OPTS ( UTF8PROC_REJECTNA | UTF8PROC_COMPAT | \
    59   UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | \
    60   UTF8PROC_NLF2LF | UTF8PROC_CASEFOLD | UTF8PROC_LUMP )
    62 PG_FUNCTION_INFO_V1(utf8proc_pgsql_unifold);
    63 Datum utf8proc_pgsql_unifold(PG_FUNCTION_ARGS) {
    64   text *input_string;
    65   text *output_string = NULL;
    66   ssize_t result;
    67   input_string = PG_GETARG_TEXT_P(0);
    68   do {
    69     result = utf8proc_decompose(
    70       VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ,
    71       NULL, 0, UTF8PROC_PGSQL_OPTS
    72     );
    73     if (result < 0) break;
    74     if (result > (SIZE_MAX-1-VARHDRSZ)/sizeof(int32_t)) {
    75       result = UTF8PROC_ERROR_OVERFLOW;
    76       break;
    77     }
    78     output_string = palloc(result * sizeof(int32_t) + 1 + VARHDRSZ);
    79     // reserve one extra byte for termination
    80     if (!output_string) {
    81       result = UTF8PROC_ERROR_NOMEM;
    82       break;
    83     }
    84     result = utf8proc_decompose(
    85       VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ,
    86       (int32_t *)VARDATA(output_string), result, UTF8PROC_PGSQL_OPTS);
    87     if (result < 0) break;
    88     result = utf8proc_reencode((int32_t *)VARDATA(output_string), result,
    89       UTF8PROC_PGSQL_OPTS);
    90   } while (0);
    91   PG_FREE_IF_COPY(input_string, 0);
    92   if (result < 0) {
    93     int sqlerrcode;
    94     if (output_string) pfree(output_string);
    95     switch(result) {
    96       case UTF8PROC_ERROR_NOMEM:
    97       sqlerrcode = ERRCODE_OUT_OF_MEMORY; break;
    98       case UTF8PROC_ERROR_OVERFLOW:
    99       sqlerrcode = ERRCODE_PROGRAM_LIMIT_EXCEEDED; break;
   100       case UTF8PROC_ERROR_INVALIDUTF8:
   101       case UTF8PROC_ERROR_NOTASSIGNED:
   102       PG_RETURN_NULL();
   103       default:
   104       sqlerrcode = ERRCODE_INTERNAL_ERROR;
   105     }
   106     ereport(ERROR, (
   107       errcode(sqlerrcode),
   108       errmsg("%s", utf8proc_errmsg(result))
   109     ));
   110   } else {
   111     VARATT_SIZEP(output_string) = result + VARHDRSZ;
   112     PG_RETURN_TEXT_P(output_string);
   113   }
   114   PG_RETURN_NULL();  // prohibit compiler warning
   115 }
