# HG changeset patch # User jbe # Date 1150797600 -7200 # Node ID 61a89ecc2fb90d91627caee94503fb3d3b836591 # Parent a0368662434c1c516db9f8e75a8f3cf1e08ab4f4 Version 0.2 - changed behaviour of PostgreSQL function to return NULL in case of invalid input, rather than raising an exceptional condition - improved efficiency of PostgreSQL function (no transformation to C string is done) - added -fpic compiler flag in Makefile - fixed bug in the C code for the ruby library (usage of non-existent function) diff -r a0368662434c -r 61a89ecc2fb9 Changelog --- a/Changelog Fri Jun 02 12:00:00 2006 +0200 +++ b/Changelog Tue Jun 20 12:00:00 2006 +0200 @@ -3,4 +3,12 @@ 2006-06-02: - initial release of version 0.1 +2006-06-05: +- changed behaviour of PostgreSQL function to return NULL in case of invalid + input, rather than raising an exceptional condition +- improved efficiency of PostgreSQL function (no transformation to C string is done) +2006-06-20: +- added -fpic compiler flag in Makefile +- fixed bug in the C code for the ruby library (usage of non-existent function) + diff -r a0368662434c -r 61a89ecc2fb9 Makefile --- a/Makefile Fri Jun 02 12:00:00 2006 +0200 +++ b/Makefile Tue Jun 20 12:00:00 2006 +0200 @@ -3,7 +3,7 @@ # settings -cflags = -g -O0 -std=c99 -pedantic -Wall $(CFLAGS) +cflags = -g -O0 -std=c99 -pedantic -Wall -fpic $(CFLAGS) cc = gcc $(cflags) diff -r a0368662434c -r 61a89ecc2fb9 README --- a/README Fri Jun 02 12:00:00 2006 +0200 +++ b/README Tue Jun 20 12:00:00 2006 +0200 @@ -73,7 +73,11 @@ function can be used to prepare index fields in order to be normalized and case-folded, i.e.: -CREATE TABLE people (id serial8 primary key, name text); +CREATE TABLE people ( + id serial8 primary key, + name text, + CHECK (unifold(name) NOTNULL) +); CREATE INDEX name_idx ON people (unifold(name)); SELECT * FROM people WHERE unifold(name) = unifold('John Doe'); diff -r a0368662434c -r 61a89ecc2fb9 pgsql/utf8proc_pgsql.c --- a/pgsql/utf8proc_pgsql.c Fri Jun 02 12:00:00 2006 +0200 +++ b/pgsql/utf8proc_pgsql.c Tue Jun 20 12:00:00 2006 +0200 @@ -33,8 +33,8 @@ /* * File name: pgsql/utf8proc_pgsql.c - * Version: 0.1 - * Last changed: 2006-05-31 + * Version: 0.2 + * Last changed: 2006-06-05 * * Description: * PostgreSQL extension to provide a function 'unifold', which can be used @@ -47,24 +47,46 @@ #include #include #include +#include #include -#include #include +#define UTF8PROC_PGSQL_OPTS ( UTF8PROC_REJECTNA | \ + UTF8PROC_COMPOSE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | UTF8PROC_CASEFOLD) + PG_FUNCTION_INFO_V1(utf8proc_pgsql_unifold); - Datum utf8proc_pgsql_unifold(PG_FUNCTION_ARGS) { - char *input_string; - uint8_t *output_string; + text *input_string; + text *output_string = NULL; ssize_t result; - input_string = DatumGetCString( - DirectFunctionCall1(textout, PG_GETARG_DATUM(0)) - ); - result = utf8proc_map(input_string, 0, &output_string, UTF8PROC_NULLTERM | - UTF8PROC_COMPOSE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | UTF8PROC_CASEFOLD); - pfree(input_string); + input_string = PG_GETARG_TEXT_P(0); + do { + result = utf8proc_decompose( + VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ, + NULL, 0, UTF8PROC_PGSQL_OPTS + ); + if (result < 0) break; + if (result > (SIZE_MAX-1-VARHDRSZ)/sizeof(int32_t)) { + result = UTF8PROC_ERROR_OVERFLOW; + break; + } + output_string = palloc(result * sizeof(int32_t) + 1 + VARHDRSZ); + // reserve one extra byte for termination + if (!output_string) { + result = UTF8PROC_ERROR_NOMEM; + break; + } + result = utf8proc_decompose( + VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ, + (int32_t *)VARDATA(output_string), result, UTF8PROC_PGSQL_OPTS); + if (result < 0) break; + result = utf8proc_reencode((int32_t *)VARDATA(output_string), result, + UTF8PROC_PGSQL_OPTS); + } while (0); + PG_FREE_IF_COPY(input_string, 0); if (result < 0) { int sqlerrcode; + if (output_string) pfree(output_string); switch(result) { case UTF8PROC_ERROR_NOMEM: sqlerrcode = ERRCODE_OUT_OF_MEMORY; break; @@ -72,7 +94,7 @@ sqlerrcode = ERRCODE_PROGRAM_LIMIT_EXCEEDED; break; case UTF8PROC_ERROR_INVALIDUTF8: case UTF8PROC_ERROR_NOTASSIGNED: - sqlerrcode = ERRCODE_DATA_EXCEPTION; break; + PG_RETURN_NULL(); default: sqlerrcode = ERRCODE_INTERNAL_ERROR; } @@ -80,13 +102,11 @@ errcode(sqlerrcode), errmsg("%s", utf8proc_errmsg(result)) )); + } else { + VARATT_SIZEP(output_string) = result + VARHDRSZ; + PG_RETURN_TEXT_P(output_string); } - { - Datum retval; - retval = DirectFunctionCall1(textin, CStringGetDatum(output_string)); - free(output_string); - PG_RETURN_TEXT_P(DatumGetTextP(retval)); - } + PG_RETURN_NULL(); // prohibit compiler warning } diff -r a0368662434c -r 61a89ecc2fb9 ruby/utf8proc.rb --- a/ruby/utf8proc.rb Fri Jun 02 12:00:00 2006 +0200 +++ b/ruby/utf8proc.rb Tue Jun 20 12:00:00 2006 +0200 @@ -33,7 +33,7 @@ ## # File name: ruby/utf8proc.rb - # Version: 0.1 + # Version: 0.2 # Last changed: 2006-05-31 # # Description: diff -r a0368662434c -r 61a89ecc2fb9 ruby/utf8proc_native.c --- a/ruby/utf8proc_native.c Fri Jun 02 12:00:00 2006 +0200 +++ b/ruby/utf8proc_native.c Tue Jun 20 12:00:00 2006 +0200 @@ -33,8 +33,8 @@ /* * File name: ruby/utf8proc_native.c - * Version: 0.1 - * Last changed: 2006-05-31 + * Version: 0.2 + * Last changed: 2006-06-20 * * Description: * Native part of the ruby wrapper for libutf8proc. @@ -97,13 +97,13 @@ if (result < 0) { free(env->buffer); env->buffer = 0; - func_map_error(result); + utf8proc_ruby_map_error(result); } result = utf8proc_reencode(env->buffer, result, options); if (result < 0) { free(env->buffer); env->buffer = 0; - func_map_error(result); + utf8proc_ruby_map_error(result); } retval = rb_str_new((char *)env->buffer, result); free(env->buffer); diff -r a0368662434c -r 61a89ecc2fb9 utf8proc.c --- a/utf8proc.c Fri Jun 02 12:00:00 2006 +0200 +++ b/utf8proc.c Tue Jun 20 12:00:00 2006 +0200 @@ -42,7 +42,7 @@ /* * File name: utf8proc.c - * Version: 0.1 + * Version: 0.2 * Last changed: 2006-05-31 * * Description: diff -r a0368662434c -r 61a89ecc2fb9 utf8proc.h --- a/utf8proc.h Fri Jun 02 12:00:00 2006 +0200 +++ b/utf8proc.h Tue Jun 20 12:00:00 2006 +0200 @@ -42,7 +42,7 @@ /* * File name: utf8proc.h - * Version: 0.1 + * Version: 0.2 * Last changed: 2006-05-31 * * Description: