utf8proc

changeset 1:61a89ecc2fb9 v0.2

Version 0.2

- changed behaviour of PostgreSQL function to return NULL in case of invalid input, rather than raising an exceptional condition
- improved efficiency of PostgreSQL function (no transformation to C string is done)
- added -fpic compiler flag in Makefile
- fixed bug in the C code for the ruby library (usage of non-existent function)
author jbe
date Tue Jun 20 12:00:00 2006 +0200 (2006-06-20)
parents a0368662434c
children aaad485d5335
files Changelog Makefile README pgsql/utf8proc_pgsql.c ruby/utf8proc.rb ruby/utf8proc_native.c utf8proc.c utf8proc.h
line diff
     1.1 --- a/Changelog	Fri Jun 02 12:00:00 2006 +0200
     1.2 +++ b/Changelog	Tue Jun 20 12:00:00 2006 +0200
     1.3 @@ -3,4 +3,12 @@
     1.4  2006-06-02:
     1.5  - initial release of version 0.1
     1.6  
     1.7 +2006-06-05:
     1.8 +- changed behaviour of PostgreSQL function to return NULL in case of invalid
     1.9 +  input, rather than raising an exceptional condition
    1.10 +- improved efficiency of PostgreSQL function (no transformation to C string is done)
    1.11  
    1.12 +2006-06-20:
    1.13 +- added -fpic compiler flag in Makefile
    1.14 +- fixed bug in the C code for the ruby library (usage of non-existent function)
    1.15 +
     2.1 --- a/Makefile	Fri Jun 02 12:00:00 2006 +0200
     2.2 +++ b/Makefile	Tue Jun 20 12:00:00 2006 +0200
     2.3 @@ -3,7 +3,7 @@
     2.4  
     2.5  # settings
     2.6  
     2.7 -cflags = -g -O0 -std=c99 -pedantic -Wall $(CFLAGS)
     2.8 +cflags = -g -O0 -std=c99 -pedantic -Wall -fpic $(CFLAGS)
     2.9  cc = gcc $(cflags)
    2.10  
    2.11  
     3.1 --- a/README	Fri Jun 02 12:00:00 2006 +0200
     3.2 +++ b/README	Tue Jun 20 12:00:00 2006 +0200
     3.3 @@ -73,7 +73,11 @@
     3.4  function can be used to prepare index fields in order to be normalized and
     3.5  case-folded, i.e.:
     3.6  
     3.7 -CREATE TABLE people (id serial8 primary key, name text);
     3.8 +CREATE TABLE people (
     3.9 +  id    serial8 primary key,
    3.10 +  name  text,
    3.11 +  CHECK (unifold(name) NOTNULL)
    3.12 +);
    3.13  CREATE INDEX name_idx ON people (unifold(name));
    3.14  SELECT * FROM people WHERE unifold(name) = unifold('John Doe');
    3.15  
     4.1 --- a/pgsql/utf8proc_pgsql.c	Fri Jun 02 12:00:00 2006 +0200
     4.2 +++ b/pgsql/utf8proc_pgsql.c	Tue Jun 20 12:00:00 2006 +0200
     4.3 @@ -33,8 +33,8 @@
     4.4  
     4.5  /*
     4.6   *  File name:    pgsql/utf8proc_pgsql.c
     4.7 - *  Version:      0.1
     4.8 - *  Last changed: 2006-05-31
     4.9 + *  Version:      0.2
    4.10 + *  Last changed: 2006-06-05
    4.11   *
    4.12   *  Description:
    4.13   *  PostgreSQL extension to provide a function 'unifold', which can be used
    4.14 @@ -47,24 +47,46 @@
    4.15  #include <postgres.h>
    4.16  #include <utils/elog.h>
    4.17  #include <fmgr.h>
    4.18 +#include <string.h>
    4.19  #include <unistd.h>
    4.20 -#include <string.h>
    4.21  #include <utils/builtins.h>
    4.22  
    4.23 +#define UTF8PROC_PGSQL_OPTS ( UTF8PROC_REJECTNA | \
    4.24 +  UTF8PROC_COMPOSE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | UTF8PROC_CASEFOLD)
    4.25 +
    4.26  PG_FUNCTION_INFO_V1(utf8proc_pgsql_unifold);
    4.27 -
    4.28  Datum utf8proc_pgsql_unifold(PG_FUNCTION_ARGS) {
    4.29 -  char *input_string;
    4.30 -  uint8_t *output_string;
    4.31 +  text *input_string;
    4.32 +  text *output_string = NULL;
    4.33    ssize_t result;
    4.34 -  input_string = DatumGetCString(
    4.35 -    DirectFunctionCall1(textout, PG_GETARG_DATUM(0))
    4.36 -  );
    4.37 -  result = utf8proc_map(input_string, 0, &output_string, UTF8PROC_NULLTERM |
    4.38 -    UTF8PROC_COMPOSE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | UTF8PROC_CASEFOLD);
    4.39 -  pfree(input_string);
    4.40 +  input_string = PG_GETARG_TEXT_P(0);
    4.41 +  do {
    4.42 +    result = utf8proc_decompose(
    4.43 +      VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ,
    4.44 +      NULL, 0, UTF8PROC_PGSQL_OPTS
    4.45 +    );
    4.46 +    if (result < 0) break;
    4.47 +    if (result > (SIZE_MAX-1-VARHDRSZ)/sizeof(int32_t)) {
    4.48 +      result = UTF8PROC_ERROR_OVERFLOW;
    4.49 +      break;
    4.50 +    }
    4.51 +    output_string = palloc(result * sizeof(int32_t) + 1 + VARHDRSZ);
    4.52 +    // reserve one extra byte for termination
    4.53 +    if (!output_string) {
    4.54 +      result = UTF8PROC_ERROR_NOMEM;
    4.55 +      break;
    4.56 +    }
    4.57 +    result = utf8proc_decompose(
    4.58 +      VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ,
    4.59 +      (int32_t *)VARDATA(output_string), result, UTF8PROC_PGSQL_OPTS);
    4.60 +    if (result < 0) break;
    4.61 +    result = utf8proc_reencode((int32_t *)VARDATA(output_string), result,
    4.62 +      UTF8PROC_PGSQL_OPTS);
    4.63 +  } while (0);
    4.64 +  PG_FREE_IF_COPY(input_string, 0);
    4.65    if (result < 0) {
    4.66      int sqlerrcode;
    4.67 +    if (output_string) pfree(output_string);
    4.68      switch(result) {
    4.69        case UTF8PROC_ERROR_NOMEM:
    4.70        sqlerrcode = ERRCODE_OUT_OF_MEMORY; break;
    4.71 @@ -72,7 +94,7 @@
    4.72        sqlerrcode = ERRCODE_PROGRAM_LIMIT_EXCEEDED; break;
    4.73        case UTF8PROC_ERROR_INVALIDUTF8:
    4.74        case UTF8PROC_ERROR_NOTASSIGNED:
    4.75 -      sqlerrcode = ERRCODE_DATA_EXCEPTION; break;
    4.76 +      PG_RETURN_NULL();
    4.77        default:
    4.78        sqlerrcode = ERRCODE_INTERNAL_ERROR;
    4.79      }
    4.80 @@ -80,13 +102,11 @@
    4.81        errcode(sqlerrcode),
    4.82        errmsg("%s", utf8proc_errmsg(result))
    4.83      ));
    4.84 +  } else {
    4.85 +    VARATT_SIZEP(output_string) = result + VARHDRSZ;
    4.86 +    PG_RETURN_TEXT_P(output_string);
    4.87    }
    4.88 -  {
    4.89 -    Datum retval;
    4.90 -    retval = DirectFunctionCall1(textin, CStringGetDatum(output_string));
    4.91 -    free(output_string);
    4.92 -    PG_RETURN_TEXT_P(DatumGetTextP(retval));
    4.93 -  }
    4.94 +  PG_RETURN_NULL();  // prohibit compiler warning
    4.95  }
    4.96  
    4.97  
     5.1 --- a/ruby/utf8proc.rb	Fri Jun 02 12:00:00 2006 +0200
     5.2 +++ b/ruby/utf8proc.rb	Tue Jun 20 12:00:00 2006 +0200
     5.3 @@ -33,7 +33,7 @@
     5.4  
     5.5  ##
     5.6   #  File name:    ruby/utf8proc.rb
     5.7 - #  Version:      0.1
     5.8 + #  Version:      0.2
     5.9   #  Last changed: 2006-05-31
    5.10   #
    5.11   #  Description:
     6.1 --- a/ruby/utf8proc_native.c	Fri Jun 02 12:00:00 2006 +0200
     6.2 +++ b/ruby/utf8proc_native.c	Tue Jun 20 12:00:00 2006 +0200
     6.3 @@ -33,8 +33,8 @@
     6.4  
     6.5  /*
     6.6   *  File name:    ruby/utf8proc_native.c
     6.7 - *  Version:      0.1
     6.8 - *  Last changed: 2006-05-31
     6.9 + *  Version:      0.2
    6.10 + *  Last changed: 2006-06-20
    6.11   *
    6.12   *  Description:
    6.13   *  Native part of the ruby wrapper for libutf8proc.
    6.14 @@ -97,13 +97,13 @@
    6.15    if (result < 0) {
    6.16      free(env->buffer);
    6.17      env->buffer = 0;
    6.18 -    func_map_error(result);
    6.19 +    utf8proc_ruby_map_error(result);
    6.20    }
    6.21    result = utf8proc_reencode(env->buffer, result, options);
    6.22    if (result < 0) {
    6.23      free(env->buffer);
    6.24      env->buffer = 0;
    6.25 -    func_map_error(result);
    6.26 +    utf8proc_ruby_map_error(result);
    6.27    }
    6.28    retval = rb_str_new((char *)env->buffer, result);
    6.29    free(env->buffer);
     7.1 --- a/utf8proc.c	Fri Jun 02 12:00:00 2006 +0200
     7.2 +++ b/utf8proc.c	Tue Jun 20 12:00:00 2006 +0200
     7.3 @@ -42,7 +42,7 @@
     7.4  
     7.5  /*
     7.6   *  File name:    utf8proc.c
     7.7 - *  Version:      0.1
     7.8 + *  Version:      0.2
     7.9   *  Last changed: 2006-05-31
    7.10   *
    7.11   *  Description:
     8.1 --- a/utf8proc.h	Fri Jun 02 12:00:00 2006 +0200
     8.2 +++ b/utf8proc.h	Tue Jun 20 12:00:00 2006 +0200
     8.3 @@ -42,7 +42,7 @@
     8.4  
     8.5  /*
     8.6   *  File name:    utf8proc.h
     8.7 - *  Version:      0.1
     8.8 + *  Version:      0.2
     8.9   *  Last changed: 2006-05-31
    8.10   *
    8.11   *  Description:

Impressum / About Us