utf8proc

diff utf8proc.c @ 0:a0368662434c

Version 0.1
author jbe
date Fri Jun 02 12:00:00 2006 +0200 (2006-06-02)
parents
children 61a89ecc2fb9
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/utf8proc.c	Fri Jun 02 12:00:00 2006 +0200
     1.3 @@ -0,0 +1,444 @@
     1.4 +/*
     1.5 + *  Copyright (c) 2006, FlexiGuided GmbH, Berlin, Germany
     1.6 + *  Author: Jan Behrens <jan.behrens@flexiguided.de>
     1.7 + *  All rights reserved.
     1.8 + *
     1.9 + *  Redistribution and use in source and binary forms, with or without
    1.10 + *  modification, are permitted provided that the following conditions are
    1.11 + *  met:
    1.12 + *
    1.13 + *  1. Redistributions of source code must retain the above copyright
    1.14 + *     notice, this list of conditions and the following disclaimer.
    1.15 + *  2. Redistributions in binary form must reproduce the above copyright
    1.16 + *     notice, this list of conditions and the following disclaimer in the
    1.17 + *     documentation and/or other materials provided with the distribution.
    1.18 + *  3. Neither the name of the FlexiGuided GmbH nor the names of its
    1.19 + *     contributors may be used to endorse or promote products derived from
    1.20 + *     this software without specific prior written permission.
    1.21 + *
    1.22 + *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    1.23 + *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    1.24 + *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    1.25 + *  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
    1.26 + *  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    1.27 + *  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    1.28 + *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
    1.29 + *  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
    1.30 + *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
    1.31 + *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
    1.32 + *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    1.33 + *
    1.34 + *
    1.35 + *  This library contains derived data from a modified version of the
    1.36 + *  Unicode data files.
    1.37 + *
    1.38 + *  The original data files are available at
    1.39 + *  http://www.unicode.org/Public/UNIDATA/
    1.40 + *
    1.41 + *  Please notice the copyright statement in the file "utf8proc_data.c".
    1.42 + *
    1.43 + */
    1.44 +
    1.45 +
    1.46 +/*
    1.47 + *  File name:    utf8proc.c
    1.48 + *  Version:      0.1
    1.49 + *  Last changed: 2006-05-31
    1.50 + *
    1.51 + *  Description:
    1.52 + *  Implementation of libutf8proc.
    1.53 + */
    1.54 +
    1.55 +
    1.56 +#include "utf8proc.h"
    1.57 +#include "utf8proc_data.c"
    1.58 +
    1.59 +
    1.60 +const int8_t utf8proc_utf8class[256] = {
    1.61 +  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1.62 +  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1.63 +  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1.64 +  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1.65 +  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1.66 +  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1.67 +  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1.68 +  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1.69 +  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    1.70 +  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    1.71 +  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    1.72 +  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    1.73 +  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    1.74 +  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    1.75 +  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
    1.76 +  4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0 };
    1.77 +
    1.78 +#define UTF8PROC_HANGUL_SBASE 0xAC00
    1.79 +#define UTF8PROC_HANGUL_LBASE 0x1100
    1.80 +#define UTF8PROC_HANGUL_VBASE 0x1161
    1.81 +#define UTF8PROC_HANGUL_TBASE 0x11A7
    1.82 +#define UTF8PROC_HANGUL_LCOUNT 19
    1.83 +#define UTF8PROC_HANGUL_VCOUNT 21
    1.84 +#define UTF8PROC_HANGUL_TCOUNT 28
    1.85 +#define UTF8PROC_HANGUL_NCOUNT 588
    1.86 +#define UTF8PROC_HANGUL_SCOUNT 11172
    1.87 +
    1.88 +
    1.89 +const char *utf8proc_errmsg(ssize_t errcode) {
    1.90 +  switch (errcode) {
    1.91 +    case UTF8PROC_ERROR_NOMEM:
    1.92 +    return "Memory for processing UTF-8 data could not be allocated.";
    1.93 +    case UTF8PROC_ERROR_OVERFLOW:
    1.94 +    return "UTF-8 string is too long to be processed.";
    1.95 +    case UTF8PROC_ERROR_INVALIDUTF8:
    1.96 +    return "Invalid UTF-8 string";
    1.97 +    case UTF8PROC_ERROR_NOTASSIGNED:
    1.98 +    return "Unassigned Unicode code point found in UTF-8 string.";
    1.99 +    default:
   1.100 +    return "An unknown error occured while processing UTF-8 data.";
   1.101 +  }
   1.102 +}
   1.103 +
   1.104 +ssize_t utf8proc_iterate(uint8_t *str, ssize_t strlen, int32_t *dst) {
   1.105 +  int length;
   1.106 +  int i;
   1.107 +  int32_t uc = -1;
   1.108 +  *dst = -1;
   1.109 +  if (!strlen) return 0;
   1.110 +  length = utf8proc_utf8class[str[0]];
   1.111 +  if (!length) return UTF8PROC_ERROR_INVALIDUTF8;
   1.112 +  if (strlen >= 0 && length > strlen) return UTF8PROC_ERROR_INVALIDUTF8;
   1.113 +  for (i=1; i<length; i++) {
   1.114 +    if ((str[i] & 0xC0) != 0x80) return UTF8PROC_ERROR_INVALIDUTF8;
   1.115 +  }
   1.116 +  switch (length) {
   1.117 +    case 1:
   1.118 +    uc = str[0];
   1.119 +    break;
   1.120 +    case 2:
   1.121 +    uc = ((str[0] & 0x1F) <<  6) + (str[1] & 0x3F);
   1.122 +    if (uc < 0x80) uc = -1;
   1.123 +    break;
   1.124 +    case 3:
   1.125 +    uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) <<  6)
   1.126 +      + (str[2] & 0x3F);
   1.127 +    if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000) ||
   1.128 +      (uc >= 0xFDD0 && uc < 0xFDF0)) uc = -1;
   1.129 +    break;
   1.130 +    case 4:
   1.131 +    uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12)
   1.132 +      + ((str[2] & 0x3F) <<  6) + (str[3] & 0x3F);
   1.133 +    if (uc < 0x10000 || uc >= 0x110000) uc = -1;
   1.134 +    break;
   1.135 +  }
   1.136 +  if (uc < 0 || ((uc & 0xFFFF) >= 0xFFFE)) return UTF8PROC_ERROR_INVALIDUTF8;
   1.137 +  *dst = uc;
   1.138 +  return length;
   1.139 +}
   1.140 +
   1.141 +ssize_t utf8proc_encode_char(int32_t uc, uint8_t *dst) {
   1.142 +  if (uc < 0x00) {
   1.143 +    return 0;
   1.144 +  } else if (uc < 0x80) {
   1.145 +    dst[0] = uc;
   1.146 +    return 1;
   1.147 +  } else if (uc < 0x800) {
   1.148 +    dst[0] = 0xC0 + (uc >> 6);
   1.149 +    dst[1] = 0x80 + (uc & 0x3F);
   1.150 +    return 2;
   1.151 +  } else if (uc < 0x10000) {
   1.152 +    dst[0] = 0xE0 + (uc >> 12);
   1.153 +    dst[1] = 0x80 + ((uc >> 6) & 0x3F);
   1.154 +    dst[2] = 0x80 + (uc & 0x3F);
   1.155 +    return 3;
   1.156 +  } else if (uc < 0x110000) {
   1.157 +    dst[0] = 0xF0 + (uc >> 18);
   1.158 +    dst[1] = 0x80 + ((uc >> 12) & 0x3F);
   1.159 +    dst[2] = 0x80 + ((uc >> 6) & 0x3F);
   1.160 +    dst[3] = 0x80 + (uc & 0x3F);
   1.161 +    return 4;
   1.162 +  } else return 0;
   1.163 +}
   1.164 +
   1.165 +const utf8proc_property_t *utf8proc_get_property(int32_t uc) {
   1.166 +  // ASSERT: uc >= 0 && uc < 0x110000
   1.167 +  return utf8proc_properties + (
   1.168 +    utf8proc_stage2table[
   1.169 +      utf8proc_stage1table[uc >> 8] + (uc & 0xFF)
   1.170 +    ]
   1.171 +  );
   1.172 +}
   1.173 +
   1.174 +ssize_t utf8proc_decompose_char(int32_t uc, int32_t *dst, ssize_t bufsize,
   1.175 +    int options) {
   1.176 +  // ASSERT: uc >= 0 && uc < 0x110000
   1.177 +  const utf8proc_property_t *property;
   1.178 +  int32_t hangul_sindex;
   1.179 +  property = utf8proc_get_property(uc);
   1.180 +  hangul_sindex = uc - UTF8PROC_HANGUL_SBASE;
   1.181 +  if (hangul_sindex >= 0 && hangul_sindex < UTF8PROC_HANGUL_SCOUNT) {
   1.182 +    int32_t hangul_tindex;
   1.183 +    if (bufsize >= 1) {
   1.184 +      dst[0] = UTF8PROC_HANGUL_LBASE +
   1.185 +        hangul_sindex / UTF8PROC_HANGUL_NCOUNT;
   1.186 +      if (bufsize >= 2) dst[1] = UTF8PROC_HANGUL_VBASE +
   1.187 +        (hangul_sindex % UTF8PROC_HANGUL_NCOUNT) / UTF8PROC_HANGUL_TCOUNT;
   1.188 +    }
   1.189 +    hangul_tindex = hangul_sindex % UTF8PROC_HANGUL_TCOUNT;
   1.190 +    if (!hangul_tindex) return 2;
   1.191 +    if (bufsize >= 3) dst[2] = UTF8PROC_HANGUL_TBASE + hangul_tindex;
   1.192 +    return 3;
   1.193 +  } else if ((options & UTF8PROC_REJECTNA) && !property->category) {
   1.194 +    return UTF8PROC_ERROR_NOTASSIGNED;
   1.195 +  } else if ((options & UTF8PROC_IGNORE) && property->ignorable) {
   1.196 +    return 0;
   1.197 +  } else if ((options & UTF8PROC_CASEFOLD) && property->casefold_mapping) {
   1.198 +    const int32_t *casefold_entry;
   1.199 +    ssize_t written = 0;
   1.200 +    for (casefold_entry = property->casefold_mapping;
   1.201 +        *casefold_entry >= 0; casefold_entry++) {
   1.202 +      written += utf8proc_decompose_char(*casefold_entry, dst+written,
   1.203 +        (bufsize > written) ? (bufsize - written) : 0, options);
   1.204 +      if (written < 0) return UTF8PROC_ERROR_OVERFLOW;
   1.205 +    }
   1.206 +    return written;
   1.207 +  } else if (property->decomp_mapping &&
   1.208 +      (!property->decomp_type || (options & UTF8PROC_COMPAT))) {
   1.209 +    const int32_t *decomp_entry;
   1.210 +    ssize_t written = 0;
   1.211 +    for (decomp_entry = property->decomp_mapping;
   1.212 +        *decomp_entry >= 0; decomp_entry++) {
   1.213 +      written += utf8proc_decompose_char(*decomp_entry, dst+written,
   1.214 +        (bufsize > written) ? (bufsize - written) : 0, options);
   1.215 +      if (written < 0) return UTF8PROC_ERROR_OVERFLOW;
   1.216 +    }
   1.217 +    return written;
   1.218 +  } else {
   1.219 +    if (bufsize >= 1) *dst = uc;
   1.220 +    return 1;
   1.221 +  }
   1.222 +}
   1.223 +
   1.224 +ssize_t utf8proc_decompose(uint8_t *str, ssize_t strlen,
   1.225 +    int32_t *buffer, ssize_t bufsize, int options) {
   1.226 +  // strlen will be ignored, if UTF8PROC_NULLTERM is set in options
   1.227 +  ssize_t wpos = 0;
   1.228 +  {
   1.229 +    int32_t uc;
   1.230 +    ssize_t rpos = 0;
   1.231 +    ssize_t decomp_result;
   1.232 +    while (1) {
   1.233 +      if (options & UTF8PROC_NULLTERM) {
   1.234 +        rpos += utf8proc_iterate(str + rpos, -1, &uc);
   1.235 +        // checking of return value is not neccessary,
   1.236 +        // as 'uc' is < 0 in case of error
   1.237 +        if (uc < 0) return UTF8PROC_ERROR_INVALIDUTF8;
   1.238 +        if (rpos < 0) return UTF8PROC_ERROR_OVERFLOW;
   1.239 +        if (uc == 0) break;
   1.240 +      } else {
   1.241 +        if (rpos >= strlen) break;
   1.242 +        rpos += utf8proc_iterate(str + rpos, strlen - rpos, &uc);
   1.243 +        if (uc < 0) return UTF8PROC_ERROR_INVALIDUTF8;
   1.244 +      }
   1.245 +      decomp_result = utf8proc_decompose_char(
   1.246 +        uc, buffer + wpos, (bufsize > wpos) ? (bufsize - wpos) : 0, options
   1.247 +      );
   1.248 +      if (decomp_result < 0) return decomp_result;
   1.249 +      wpos += decomp_result;
   1.250 +      // prohibiting integer overflows due to too long strings:
   1.251 +      if (wpos < 0 || wpos > SSIZE_MAX/sizeof(int32_t)/2)
   1.252 +        return UTF8PROC_ERROR_OVERFLOW;
   1.253 +    }
   1.254 +  }
   1.255 +  if (bufsize >= wpos) {
   1.256 +    ssize_t pos = 0;
   1.257 +    while (pos < wpos-1) {
   1.258 +      int32_t uc1, uc2;
   1.259 +      const utf8proc_property_t *property1, *property2;
   1.260 +      uc1 = buffer[pos];
   1.261 +      uc2 = buffer[pos+1];
   1.262 +      property1 = utf8proc_get_property(uc1);
   1.263 +      property2 = utf8proc_get_property(uc2);
   1.264 +      if (property1->combining_class > property2->combining_class &&
   1.265 +          property2->combining_class > 0) {
   1.266 +        buffer[pos] = uc2;
   1.267 +        buffer[pos+1] = uc1;
   1.268 +        if (pos > 0) pos--; else pos++;
   1.269 +      } else {
   1.270 +        pos++;
   1.271 +      }
   1.272 +    }
   1.273 +  }
   1.274 +  return wpos;
   1.275 +}
   1.276 +
   1.277 +ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, int options) {
   1.278 +  // UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored
   1.279 +  // ASSERT: 'buffer' has one spare byte of free space at the end!
   1.280 +  if (options & (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS | UTF8PROC_STRIPCC)) {
   1.281 +    ssize_t rpos;
   1.282 +    ssize_t wpos = 0;
   1.283 +    int32_t uc;
   1.284 +    for (rpos = 0; rpos < length; rpos++) {
   1.285 +      uc = buffer[rpos];
   1.286 +      if (uc == 0x000D && rpos < length-1 && buffer[rpos+1] == 0x000A) rpos++;
   1.287 +      if (uc == 0x000A || uc == 0x000D || uc == 0x0085 ||
   1.288 +          ((options & UTF8PROC_STRIPCC) && (uc == 0x000B || uc == 0x000C))) {
   1.289 +        if (options & UTF8PROC_NLF2LS) {
   1.290 +          if (options & UTF8PROC_NLF2PS) {
   1.291 +            buffer[wpos++] = 0x000A;
   1.292 +          } else {
   1.293 +            buffer[wpos++] = 0x2028;
   1.294 +          }
   1.295 +        } else {
   1.296 +          if (options & UTF8PROC_NLF2PS) {
   1.297 +            buffer[wpos++] = 0x2029;
   1.298 +          } else {
   1.299 +            buffer[wpos++] = 0x0020;
   1.300 +          }
   1.301 +        }
   1.302 +      } else if ((options & UTF8PROC_STRIPCC) &&
   1.303 +          (uc < 0x0020 || (uc >= 0x007F && uc < 0x00A0))) {
   1.304 +        if (uc == 0x0009) buffer[wpos++] = 0x0020;
   1.305 +      } else {
   1.306 +        buffer[wpos++] = uc;
   1.307 +      }
   1.308 +    }
   1.309 +    length = wpos;
   1.310 +  }
   1.311 +  if (options & UTF8PROC_COMPOSE) {
   1.312 +    int32_t *starter = NULL;
   1.313 +    int32_t current_char;
   1.314 +    const utf8proc_property_t *starter_property = NULL, *current_property;
   1.315 +    int16_t max_combining_class = -1;
   1.316 +    ssize_t rpos;
   1.317 +    ssize_t wpos = 0;
   1.318 +    int32_t composition;
   1.319 +    for (rpos = 0; rpos < length; rpos++) {
   1.320 +      current_char = buffer[rpos];
   1.321 +      current_property = utf8proc_get_property(current_char);
   1.322 +      if (starter && current_property->combining_class > max_combining_class) {
   1.323 +        // combination perhaps possible
   1.324 +        int32_t hangul_lindex;
   1.325 +        int32_t hangul_sindex;
   1.326 +        hangul_lindex = *starter - UTF8PROC_HANGUL_LBASE;
   1.327 +        if (hangul_lindex >= 0 && hangul_lindex < UTF8PROC_HANGUL_LCOUNT) {
   1.328 +          int32_t hangul_vindex;
   1.329 +          hangul_vindex = current_char - UTF8PROC_HANGUL_VBASE;
   1.330 +          if (hangul_vindex >= 0 && hangul_vindex < UTF8PROC_HANGUL_VCOUNT) {
   1.331 +            *starter = UTF8PROC_HANGUL_SBASE +
   1.332 +              (hangul_lindex * UTF8PROC_HANGUL_VCOUNT + hangul_vindex) *
   1.333 +              UTF8PROC_HANGUL_TCOUNT;
   1.334 +            starter_property = NULL;
   1.335 +            continue;
   1.336 +          }
   1.337 +        }
   1.338 +        hangul_sindex = *starter - UTF8PROC_HANGUL_SBASE;
   1.339 +        if (hangul_sindex >= 0 && hangul_sindex < UTF8PROC_HANGUL_SCOUNT &&
   1.340 +            (hangul_sindex % UTF8PROC_HANGUL_TCOUNT) == 0) {
   1.341 +          int32_t hangul_tindex;
   1.342 +          hangul_tindex = current_char - UTF8PROC_HANGUL_TBASE;
   1.343 +          if (hangul_tindex >= 0 && hangul_tindex < UTF8PROC_HANGUL_TCOUNT) {
   1.344 +            *starter += hangul_tindex;
   1.345 +            starter_property = NULL;
   1.346 +            continue;
   1.347 +          }
   1.348 +        }
   1.349 +        if (!starter_property) {
   1.350 +          starter_property = utf8proc_get_property(*starter);
   1.351 +        }
   1.352 +        if (starter_property->comb1st_index >= 0 &&
   1.353 +            current_property->comb2nd_index >= 0) {
   1.354 +          composition = utf8proc_combinations[
   1.355 +            starter_property->comb1st_index +
   1.356 +            current_property->comb2nd_index
   1.357 +          ];
   1.358 +          if (composition >= 0 && (!(options & UTF8PROC_STABLE) ||
   1.359 +              !(utf8proc_get_property(composition)->comp_exclusion))) {
   1.360 +            *starter = composition;
   1.361 +            starter_property = NULL;
   1.362 +            continue;
   1.363 +          }
   1.364 +        }
   1.365 +      }
   1.366 +      buffer[wpos] = current_char;
   1.367 +      if (current_property->combining_class) {
   1.368 +        if (current_property->combining_class > max_combining_class) {
   1.369 +          max_combining_class = current_property->combining_class;
   1.370 +        }
   1.371 +      } else {
   1.372 +        starter = buffer + wpos;
   1.373 +        starter_property = NULL;
   1.374 +        max_combining_class = -1;
   1.375 +      }
   1.376 +      wpos++;
   1.377 +    }
   1.378 +    length = wpos;
   1.379 +  }
   1.380 +  {
   1.381 +    ssize_t rpos, wpos = 0;
   1.382 +    int32_t uc;
   1.383 +    for (rpos = 0; rpos < length; rpos++) {
   1.384 +      uc = buffer[rpos];
   1.385 +      wpos += utf8proc_encode_char(uc, ((uint8_t *)buffer) + wpos);
   1.386 +    }
   1.387 +    ((uint8_t *)buffer)[wpos] = 0;
   1.388 +    return wpos;
   1.389 +  }
   1.390 +}
   1.391 +
   1.392 +ssize_t utf8proc_map(uint8_t *str, ssize_t strlen, uint8_t **dstptr,
   1.393 +    int options) {
   1.394 +  int32_t *buffer;
   1.395 +  ssize_t result;
   1.396 +  *dstptr = NULL;
   1.397 +  result = utf8proc_decompose(str, strlen, NULL, 0, options);
   1.398 +  if (result < 0) return result;
   1.399 +  buffer = malloc(result * sizeof(int32_t) + 1);
   1.400 +  if (!buffer) return UTF8PROC_ERROR_NOMEM;
   1.401 +  result = utf8proc_decompose(str, strlen, buffer, result, options);
   1.402 +  if (result < 0) {
   1.403 +    free(buffer);
   1.404 +    return result;
   1.405 +  }
   1.406 +  result = utf8proc_reencode(buffer, result, options);
   1.407 +  if (result < 0) {
   1.408 +    free(buffer);
   1.409 +    return result;
   1.410 +  }
   1.411 +  {
   1.412 +    int32_t *newptr;
   1.413 +    newptr = realloc(buffer, result+1);
   1.414 +    if (newptr) buffer = newptr;
   1.415 +  }
   1.416 +  *dstptr = (uint8_t *)buffer;
   1.417 +  return result;
   1.418 +}
   1.419 +
   1.420 +uint8_t *utf8proc_NFD(uint8_t *str) {
   1.421 +  uint8_t *retval;
   1.422 +  utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE);
   1.423 +  return retval;
   1.424 +}
   1.425 +
   1.426 +uint8_t *utf8proc_NFC(uint8_t *str) {
   1.427 +  uint8_t *retval;
   1.428 +  utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
   1.429 +    UTF8PROC_COMPOSE);
   1.430 +  return retval;
   1.431 +}
   1.432 +
   1.433 +uint8_t *utf8proc_NFKD(uint8_t *str) {
   1.434 +  uint8_t *retval;
   1.435 +  utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
   1.436 +    UTF8PROC_COMPAT);
   1.437 +  return retval;
   1.438 +}
   1.439 +
   1.440 +uint8_t *utf8proc_NFKC(uint8_t *str) {
   1.441 +  uint8_t *retval;
   1.442 +  utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
   1.443 +    UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
   1.444 +  return retval;
   1.445 +}
   1.446 +
   1.447 +

Impressum / About Us