utf8proc
changeset 1:61a89ecc2fb9 v0.2
Version 0.2
- changed behaviour of PostgreSQL function to return NULL in case of invalid input, rather than raising an exceptional condition
- improved efficiency of PostgreSQL function (no transformation to C string is done)
- added -fpic compiler flag in Makefile
- fixed bug in the C code for the ruby library (usage of non-existent function)
- changed behaviour of PostgreSQL function to return NULL in case of invalid input, rather than raising an exceptional condition
- improved efficiency of PostgreSQL function (no transformation to C string is done)
- added -fpic compiler flag in Makefile
- fixed bug in the C code for the ruby library (usage of non-existent function)
author | jbe |
---|---|
date | Tue Jun 20 12:00:00 2006 +0200 (2006-06-20) |
parents | a0368662434c |
children | aaad485d5335 |
files | Changelog Makefile README pgsql/utf8proc_pgsql.c ruby/utf8proc.rb ruby/utf8proc_native.c utf8proc.c utf8proc.h |
line diff
1.1 --- a/Changelog Fri Jun 02 12:00:00 2006 +0200 1.2 +++ b/Changelog Tue Jun 20 12:00:00 2006 +0200 1.3 @@ -3,4 +3,12 @@ 1.4 2006-06-02: 1.5 - initial release of version 0.1 1.6 1.7 +2006-06-05: 1.8 +- changed behaviour of PostgreSQL function to return NULL in case of invalid 1.9 + input, rather than raising an exceptional condition 1.10 +- improved efficiency of PostgreSQL function (no transformation to C string is done) 1.11 1.12 +2006-06-20: 1.13 +- added -fpic compiler flag in Makefile 1.14 +- fixed bug in the C code for the ruby library (usage of non-existent function) 1.15 +
2.1 --- a/Makefile Fri Jun 02 12:00:00 2006 +0200 2.2 +++ b/Makefile Tue Jun 20 12:00:00 2006 +0200 2.3 @@ -3,7 +3,7 @@ 2.4 2.5 # settings 2.6 2.7 -cflags = -g -O0 -std=c99 -pedantic -Wall $(CFLAGS) 2.8 +cflags = -g -O0 -std=c99 -pedantic -Wall -fpic $(CFLAGS) 2.9 cc = gcc $(cflags) 2.10 2.11
3.1 --- a/README Fri Jun 02 12:00:00 2006 +0200 3.2 +++ b/README Tue Jun 20 12:00:00 2006 +0200 3.3 @@ -73,7 +73,11 @@ 3.4 function can be used to prepare index fields in order to be normalized and 3.5 case-folded, i.e.: 3.6 3.7 -CREATE TABLE people (id serial8 primary key, name text); 3.8 +CREATE TABLE people ( 3.9 + id serial8 primary key, 3.10 + name text, 3.11 + CHECK (unifold(name) NOTNULL) 3.12 +); 3.13 CREATE INDEX name_idx ON people (unifold(name)); 3.14 SELECT * FROM people WHERE unifold(name) = unifold('John Doe'); 3.15
4.1 --- a/pgsql/utf8proc_pgsql.c Fri Jun 02 12:00:00 2006 +0200 4.2 +++ b/pgsql/utf8proc_pgsql.c Tue Jun 20 12:00:00 2006 +0200 4.3 @@ -33,8 +33,8 @@ 4.4 4.5 /* 4.6 * File name: pgsql/utf8proc_pgsql.c 4.7 - * Version: 0.1 4.8 - * Last changed: 2006-05-31 4.9 + * Version: 0.2 4.10 + * Last changed: 2006-06-05 4.11 * 4.12 * Description: 4.13 * PostgreSQL extension to provide a function 'unifold', which can be used 4.14 @@ -47,24 +47,46 @@ 4.15 #include <postgres.h> 4.16 #include <utils/elog.h> 4.17 #include <fmgr.h> 4.18 +#include <string.h> 4.19 #include <unistd.h> 4.20 -#include <string.h> 4.21 #include <utils/builtins.h> 4.22 4.23 +#define UTF8PROC_PGSQL_OPTS ( UTF8PROC_REJECTNA | \ 4.24 + UTF8PROC_COMPOSE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | UTF8PROC_CASEFOLD) 4.25 + 4.26 PG_FUNCTION_INFO_V1(utf8proc_pgsql_unifold); 4.27 - 4.28 Datum utf8proc_pgsql_unifold(PG_FUNCTION_ARGS) { 4.29 - char *input_string; 4.30 - uint8_t *output_string; 4.31 + text *input_string; 4.32 + text *output_string = NULL; 4.33 ssize_t result; 4.34 - input_string = DatumGetCString( 4.35 - DirectFunctionCall1(textout, PG_GETARG_DATUM(0)) 4.36 - ); 4.37 - result = utf8proc_map(input_string, 0, &output_string, UTF8PROC_NULLTERM | 4.38 - UTF8PROC_COMPOSE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | UTF8PROC_CASEFOLD); 4.39 - pfree(input_string); 4.40 + input_string = PG_GETARG_TEXT_P(0); 4.41 + do { 4.42 + result = utf8proc_decompose( 4.43 + VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ, 4.44 + NULL, 0, UTF8PROC_PGSQL_OPTS 4.45 + ); 4.46 + if (result < 0) break; 4.47 + if (result > (SIZE_MAX-1-VARHDRSZ)/sizeof(int32_t)) { 4.48 + result = UTF8PROC_ERROR_OVERFLOW; 4.49 + break; 4.50 + } 4.51 + output_string = palloc(result * sizeof(int32_t) + 1 + VARHDRSZ); 4.52 + // reserve one extra byte for termination 4.53 + if (!output_string) { 4.54 + result = UTF8PROC_ERROR_NOMEM; 4.55 + break; 4.56 + } 4.57 + result = utf8proc_decompose( 4.58 + VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ, 4.59 + (int32_t *)VARDATA(output_string), result, UTF8PROC_PGSQL_OPTS); 4.60 + if (result < 0) break; 4.61 + result = utf8proc_reencode((int32_t *)VARDATA(output_string), result, 4.62 + UTF8PROC_PGSQL_OPTS); 4.63 + } while (0); 4.64 + PG_FREE_IF_COPY(input_string, 0); 4.65 if (result < 0) { 4.66 int sqlerrcode; 4.67 + if (output_string) pfree(output_string); 4.68 switch(result) { 4.69 case UTF8PROC_ERROR_NOMEM: 4.70 sqlerrcode = ERRCODE_OUT_OF_MEMORY; break; 4.71 @@ -72,7 +94,7 @@ 4.72 sqlerrcode = ERRCODE_PROGRAM_LIMIT_EXCEEDED; break; 4.73 case UTF8PROC_ERROR_INVALIDUTF8: 4.74 case UTF8PROC_ERROR_NOTASSIGNED: 4.75 - sqlerrcode = ERRCODE_DATA_EXCEPTION; break; 4.76 + PG_RETURN_NULL(); 4.77 default: 4.78 sqlerrcode = ERRCODE_INTERNAL_ERROR; 4.79 } 4.80 @@ -80,13 +102,11 @@ 4.81 errcode(sqlerrcode), 4.82 errmsg("%s", utf8proc_errmsg(result)) 4.83 )); 4.84 + } else { 4.85 + VARATT_SIZEP(output_string) = result + VARHDRSZ; 4.86 + PG_RETURN_TEXT_P(output_string); 4.87 } 4.88 - { 4.89 - Datum retval; 4.90 - retval = DirectFunctionCall1(textin, CStringGetDatum(output_string)); 4.91 - free(output_string); 4.92 - PG_RETURN_TEXT_P(DatumGetTextP(retval)); 4.93 - } 4.94 + PG_RETURN_NULL(); // prohibit compiler warning 4.95 } 4.96 4.97
5.1 --- a/ruby/utf8proc.rb Fri Jun 02 12:00:00 2006 +0200 5.2 +++ b/ruby/utf8proc.rb Tue Jun 20 12:00:00 2006 +0200 5.3 @@ -33,7 +33,7 @@ 5.4 5.5 ## 5.6 # File name: ruby/utf8proc.rb 5.7 - # Version: 0.1 5.8 + # Version: 0.2 5.9 # Last changed: 2006-05-31 5.10 # 5.11 # Description:
6.1 --- a/ruby/utf8proc_native.c Fri Jun 02 12:00:00 2006 +0200 6.2 +++ b/ruby/utf8proc_native.c Tue Jun 20 12:00:00 2006 +0200 6.3 @@ -33,8 +33,8 @@ 6.4 6.5 /* 6.6 * File name: ruby/utf8proc_native.c 6.7 - * Version: 0.1 6.8 - * Last changed: 2006-05-31 6.9 + * Version: 0.2 6.10 + * Last changed: 2006-06-20 6.11 * 6.12 * Description: 6.13 * Native part of the ruby wrapper for libutf8proc. 6.14 @@ -97,13 +97,13 @@ 6.15 if (result < 0) { 6.16 free(env->buffer); 6.17 env->buffer = 0; 6.18 - func_map_error(result); 6.19 + utf8proc_ruby_map_error(result); 6.20 } 6.21 result = utf8proc_reencode(env->buffer, result, options); 6.22 if (result < 0) { 6.23 free(env->buffer); 6.24 env->buffer = 0; 6.25 - func_map_error(result); 6.26 + utf8proc_ruby_map_error(result); 6.27 } 6.28 retval = rb_str_new((char *)env->buffer, result); 6.29 free(env->buffer);