utf8proc

diff pgsql/utf8proc_pgsql.c @ 1:61a89ecc2fb9

Version 0.2

- changed behaviour of PostgreSQL function to return NULL in case of invalid input, rather than raising an exceptional condition
- improved efficiency of PostgreSQL function (no transformation to C string is done)
- added -fpic compiler flag in Makefile
- fixed bug in the C code for the ruby library (usage of non-existent function)
author jbe
date Tue Jun 20 12:00:00 2006 +0200 (2006-06-20)
parents a0368662434c
children aaad485d5335
line diff
     1.1 --- a/pgsql/utf8proc_pgsql.c	Fri Jun 02 12:00:00 2006 +0200
     1.2 +++ b/pgsql/utf8proc_pgsql.c	Tue Jun 20 12:00:00 2006 +0200
     1.3 @@ -33,8 +33,8 @@
     1.4  
     1.5  /*
     1.6   *  File name:    pgsql/utf8proc_pgsql.c
     1.7 - *  Version:      0.1
     1.8 - *  Last changed: 2006-05-31
     1.9 + *  Version:      0.2
    1.10 + *  Last changed: 2006-06-05
    1.11   *
    1.12   *  Description:
    1.13   *  PostgreSQL extension to provide a function 'unifold', which can be used
    1.14 @@ -47,24 +47,46 @@
    1.15  #include <postgres.h>
    1.16  #include <utils/elog.h>
    1.17  #include <fmgr.h>
    1.18 +#include <string.h>
    1.19  #include <unistd.h>
    1.20 -#include <string.h>
    1.21  #include <utils/builtins.h>
    1.22  
    1.23 +#define UTF8PROC_PGSQL_OPTS ( UTF8PROC_REJECTNA | \
    1.24 +  UTF8PROC_COMPOSE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | UTF8PROC_CASEFOLD)
    1.25 +
    1.26  PG_FUNCTION_INFO_V1(utf8proc_pgsql_unifold);
    1.27 -
    1.28  Datum utf8proc_pgsql_unifold(PG_FUNCTION_ARGS) {
    1.29 -  char *input_string;
    1.30 -  uint8_t *output_string;
    1.31 +  text *input_string;
    1.32 +  text *output_string = NULL;
    1.33    ssize_t result;
    1.34 -  input_string = DatumGetCString(
    1.35 -    DirectFunctionCall1(textout, PG_GETARG_DATUM(0))
    1.36 -  );
    1.37 -  result = utf8proc_map(input_string, 0, &output_string, UTF8PROC_NULLTERM |
    1.38 -    UTF8PROC_COMPOSE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | UTF8PROC_CASEFOLD);
    1.39 -  pfree(input_string);
    1.40 +  input_string = PG_GETARG_TEXT_P(0);
    1.41 +  do {
    1.42 +    result = utf8proc_decompose(
    1.43 +      VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ,
    1.44 +      NULL, 0, UTF8PROC_PGSQL_OPTS
    1.45 +    );
    1.46 +    if (result < 0) break;
    1.47 +    if (result > (SIZE_MAX-1-VARHDRSZ)/sizeof(int32_t)) {
    1.48 +      result = UTF8PROC_ERROR_OVERFLOW;
    1.49 +      break;
    1.50 +    }
    1.51 +    output_string = palloc(result * sizeof(int32_t) + 1 + VARHDRSZ);
    1.52 +    // reserve one extra byte for termination
    1.53 +    if (!output_string) {
    1.54 +      result = UTF8PROC_ERROR_NOMEM;
    1.55 +      break;
    1.56 +    }
    1.57 +    result = utf8proc_decompose(
    1.58 +      VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ,
    1.59 +      (int32_t *)VARDATA(output_string), result, UTF8PROC_PGSQL_OPTS);
    1.60 +    if (result < 0) break;
    1.61 +    result = utf8proc_reencode((int32_t *)VARDATA(output_string), result,
    1.62 +      UTF8PROC_PGSQL_OPTS);
    1.63 +  } while (0);
    1.64 +  PG_FREE_IF_COPY(input_string, 0);
    1.65    if (result < 0) {
    1.66      int sqlerrcode;
    1.67 +    if (output_string) pfree(output_string);
    1.68      switch(result) {
    1.69        case UTF8PROC_ERROR_NOMEM:
    1.70        sqlerrcode = ERRCODE_OUT_OF_MEMORY; break;
    1.71 @@ -72,7 +94,7 @@
    1.72        sqlerrcode = ERRCODE_PROGRAM_LIMIT_EXCEEDED; break;
    1.73        case UTF8PROC_ERROR_INVALIDUTF8:
    1.74        case UTF8PROC_ERROR_NOTASSIGNED:
    1.75 -      sqlerrcode = ERRCODE_DATA_EXCEPTION; break;
    1.76 +      PG_RETURN_NULL();
    1.77        default:
    1.78        sqlerrcode = ERRCODE_INTERNAL_ERROR;
    1.79      }
    1.80 @@ -80,13 +102,11 @@
    1.81        errcode(sqlerrcode),
    1.82        errmsg("%s", utf8proc_errmsg(result))
    1.83      ));
    1.84 +  } else {
    1.85 +    VARATT_SIZEP(output_string) = result + VARHDRSZ;
    1.86 +    PG_RETURN_TEXT_P(output_string);
    1.87    }
    1.88 -  {
    1.89 -    Datum retval;
    1.90 -    retval = DirectFunctionCall1(textin, CStringGetDatum(output_string));
    1.91 -    free(output_string);
    1.92 -    PG_RETURN_TEXT_P(DatumGetTextP(retval));
    1.93 -  }
    1.94 +  PG_RETURN_NULL();  // prohibit compiler warning
    1.95  }
    1.96  
    1.97  

Impressum / About Us