utf8proc

diff utf8proc.h @ 15:15450ff3d454

Contribution from libmojibake fork
author Jiahao Chen, Steven G. Johnson, Anthony David Kelman
date Fri Nov 21 08:27:44 2014 -0500 (2014-11-21)
parents 82d33620bb8a
children
line diff
     1.1 --- a/utf8proc.h	Wed Nov 27 12:00:00 2013 +0100
     1.2 +++ b/utf8proc.h	Fri Nov 21 08:27:44 2014 -0500
     1.3 @@ -44,7 +44,7 @@
     1.4   *  - rejection of illegal UTF-8 data
     1.5   *    (i.e. UTF-8 encoded UTF-16 surrogates)
     1.6   *  - support for korean hangul characters
     1.7 - *  Unicode Version 5.0.0 is supported.
     1.8 + *  Unicode Version 7.0.0 is supported.
     1.9   */
    1.10  
    1.11  
    1.12 @@ -60,19 +60,33 @@
    1.13  typedef short int16_t;
    1.14  typedef unsigned short uint16_t;
    1.15  typedef int int32_t;
    1.16 -#ifdef _WIN64
    1.17 -#define ssize_t __int64
    1.18 -#else
    1.19 -#define ssize_t int
    1.20 -#endif
    1.21 +#  ifdef _WIN64
    1.22 +#    define ssize_t __int64
    1.23 +#  else
    1.24 +#    define ssize_t int
    1.25 +#  endif
    1.26 +#  ifndef __cplusplus
    1.27  typedef unsigned char bool;
    1.28  enum {false, true};
    1.29 +#  endif
    1.30  #else
    1.31 -#include <stdbool.h>
    1.32 -#include <inttypes.h>
    1.33 +#  include <stdbool.h>
    1.34 +#  include <inttypes.h>
    1.35  #endif
    1.36  #include <limits.h>
    1.37  
    1.38 +#ifdef _WIN32
    1.39 +#  ifdef UTF8PROC_EXPORTS
    1.40 +#    define DLLEXPORT __declspec(dllexport)
    1.41 +#  else
    1.42 +#    define DLLEXPORT __declspec(dllimport)
    1.43 +#  endif
    1.44 +#elif __GNUC__ >= 4
    1.45 +#  define DLLEXPORT __attribute__ ((visibility("default")))
    1.46 +#else
    1.47 +#  define DLLEXPORT
    1.48 +#endif
    1.49 +
    1.50  #ifdef __cplusplus
    1.51  extern "C" {
    1.52  #endif
    1.53 @@ -100,7 +114,7 @@
    1.54   *  Flags being regarded by several functions in the library:
    1.55   *  NULLTERM:  The given UTF-8 input is NULL terminated.
    1.56   *  STABLE:    Unicode Versioning Stability has to be respected.
    1.57 - *  COMPAT:    Compatiblity decomposition
    1.58 + *  COMPAT:    Compatibility decomposition
    1.59   *             (i.e. formatting information is lost)
    1.60   *  COMPOSE:   Return a result with composed characters.
    1.61   *  DECOMPOSE: Return a result with decomposed characters.
    1.62 @@ -218,6 +232,10 @@
    1.63  #define UTF8PROC_BIDI_CLASS_S   17
    1.64  #define UTF8PROC_BIDI_CLASS_WS  18
    1.65  #define UTF8PROC_BIDI_CLASS_ON  19
    1.66 +#define UTF8PROC_BIDI_CLASS_LRI  20 /* new in Unicode 6.3 */
    1.67 +#define UTF8PROC_BIDI_CLASS_RLI  21 /* new in Unicode 6.3 */
    1.68 +#define UTF8PROC_BIDI_CLASS_FSI  22 /* new in Unicode 6.3 */
    1.69 +#define UTF8PROC_BIDI_CLASS_PDI  23 /* new in Unicode 6.3 */
    1.70  #define UTF8PROC_DECOMP_TYPE_FONT      1
    1.71  #define UTF8PROC_DECOMP_TYPE_NOBREAK   2
    1.72  #define UTF8PROC_DECOMP_TYPE_INITIAL   3
    1.73 @@ -235,16 +253,16 @@
    1.74  #define UTF8PROC_DECOMP_TYPE_FRACTION 15
    1.75  #define UTF8PROC_DECOMP_TYPE_COMPAT   16
    1.76  
    1.77 -extern const int8_t utf8proc_utf8class[256];
    1.78 +DLLEXPORT extern const int8_t utf8proc_utf8class[256];
    1.79  
    1.80 -const char *utf8proc_version(void);
    1.81 +DLLEXPORT const char *utf8proc_version(void);
    1.82  
    1.83 -const char *utf8proc_errmsg(ssize_t errcode);
    1.84 +DLLEXPORT const char *utf8proc_errmsg(ssize_t errcode);
    1.85  /*
    1.86   *  Returns a static error string for the given error code.
    1.87   */
    1.88  
    1.89 -ssize_t utf8proc_iterate(const uint8_t *str, ssize_t strlen, int32_t *dst);
    1.90 +DLLEXPORT ssize_t utf8proc_iterate(const uint8_t *str, ssize_t strlen, int32_t *dst);
    1.91  /*
    1.92   *  Reads a single char from the UTF-8 sequence being pointed to by 'str'.
    1.93   *  The maximum number of bytes read is 'strlen', unless 'strlen' is
    1.94 @@ -255,12 +273,12 @@
    1.95   *  negative error code is returned.
    1.96   */
    1.97  
    1.98 -bool utf8proc_codepoint_valid(int32_t uc);
    1.99 +DLLEXPORT bool utf8proc_codepoint_valid(int32_t uc);
   1.100  /*
   1.101   *  Returns 1, if the given unicode code-point is valid, otherwise 0.
   1.102   */
   1.103  
   1.104 -ssize_t utf8proc_encode_char(int32_t uc, uint8_t *dst);
   1.105 +DLLEXPORT ssize_t utf8proc_encode_char(int32_t uc, uint8_t *dst);
   1.106  /*
   1.107   *  Encodes the unicode char with the code point 'uc' as an UTF-8 string in
   1.108   *  the byte array being pointed to by 'dst'. This array has to be at least
   1.109 @@ -270,7 +288,7 @@
   1.110   *  This function does not check if 'uc' is a valid unicode code point.
   1.111   */
   1.112  
   1.113 -const utf8proc_property_t *utf8proc_get_property(int32_t uc);
   1.114 +DLLEXPORT const utf8proc_property_t *utf8proc_get_property(int32_t uc);
   1.115  /*
   1.116   *  Returns a pointer to a (constant) struct containing information about
   1.117   *  the unicode char with the given code point 'uc'.
   1.118 @@ -280,7 +298,7 @@
   1.119   *           0x10FFFF, otherwise the program might crash!
   1.120   */
   1.121  
   1.122 -ssize_t utf8proc_decompose_char(
   1.123 +DLLEXPORT ssize_t utf8proc_decompose_char(
   1.124    int32_t uc, int32_t *dst, ssize_t bufsize,
   1.125    int options, int *last_boundclass
   1.126  );
   1.127 @@ -308,7 +326,7 @@
   1.128   *           0x10FFFF, otherwise the program might crash!
   1.129   */
   1.130  
   1.131 -ssize_t utf8proc_decompose(
   1.132 +DLLEXPORT ssize_t utf8proc_decompose(
   1.133    const uint8_t *str, ssize_t strlen,
   1.134    int32_t *buffer, ssize_t bufsize, int options
   1.135  );
   1.136 @@ -326,7 +344,7 @@
   1.137   *  buffer size is returned.
   1.138   */
   1.139  
   1.140 -ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, int options);
   1.141 +DLLEXPORT ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, int options);
   1.142  /*
   1.143   *  Reencodes the sequence of unicode characters given by the pointer
   1.144   *  'buffer' and 'length' as UTF-8.
   1.145 @@ -349,7 +367,7 @@
   1.146   *           crash!
   1.147   */
   1.148  
   1.149 -ssize_t utf8proc_map(
   1.150 +DLLEXPORT ssize_t utf8proc_map(
   1.151    const uint8_t *str, ssize_t strlen, uint8_t **dstptr, int options
   1.152  );
   1.153  /*
   1.154 @@ -368,10 +386,10 @@
   1.155   *          'malloc', and has theirfore to be freed with 'free'.
   1.156   */
   1.157  
   1.158 -uint8_t *utf8proc_NFD(const uint8_t *str);
   1.159 -uint8_t *utf8proc_NFC(const uint8_t *str);
   1.160 -uint8_t *utf8proc_NFKD(const uint8_t *str);
   1.161 -uint8_t *utf8proc_NFKC(const uint8_t *str);
   1.162 +DLLEXPORT uint8_t *utf8proc_NFD(const uint8_t *str);
   1.163 +DLLEXPORT uint8_t *utf8proc_NFC(const uint8_t *str);
   1.164 +DLLEXPORT uint8_t *utf8proc_NFKD(const uint8_t *str);
   1.165 +DLLEXPORT uint8_t *utf8proc_NFKC(const uint8_t *str);
   1.166  /*
   1.167   *  Returns a pointer to newly allocated memory of a NFD, NFC, NFKD or NFKC
   1.168   *  normalized version of the null-terminated string 'str'.

Impressum / About Us