utf8proc
diff utf8proc.h @ 15:15450ff3d454
Contribution from libmojibake fork
author | Jiahao Chen, Steven G. Johnson, Anthony David Kelman |
---|---|
date | Fri Nov 21 08:27:44 2014 -0500 (2014-11-21) |
parents | 82d33620bb8a |
children |
line diff
1.1 --- a/utf8proc.h Wed Nov 27 12:00:00 2013 +0100 1.2 +++ b/utf8proc.h Fri Nov 21 08:27:44 2014 -0500 1.3 @@ -44,7 +44,7 @@ 1.4 * - rejection of illegal UTF-8 data 1.5 * (i.e. UTF-8 encoded UTF-16 surrogates) 1.6 * - support for korean hangul characters 1.7 - * Unicode Version 5.0.0 is supported. 1.8 + * Unicode Version 7.0.0 is supported. 1.9 */ 1.10 1.11 1.12 @@ -60,19 +60,33 @@ 1.13 typedef short int16_t; 1.14 typedef unsigned short uint16_t; 1.15 typedef int int32_t; 1.16 -#ifdef _WIN64 1.17 -#define ssize_t __int64 1.18 -#else 1.19 -#define ssize_t int 1.20 -#endif 1.21 +# ifdef _WIN64 1.22 +# define ssize_t __int64 1.23 +# else 1.24 +# define ssize_t int 1.25 +# endif 1.26 +# ifndef __cplusplus 1.27 typedef unsigned char bool; 1.28 enum {false, true}; 1.29 +# endif 1.30 #else 1.31 -#include <stdbool.h> 1.32 -#include <inttypes.h> 1.33 +# include <stdbool.h> 1.34 +# include <inttypes.h> 1.35 #endif 1.36 #include <limits.h> 1.37 1.38 +#ifdef _WIN32 1.39 +# ifdef UTF8PROC_EXPORTS 1.40 +# define DLLEXPORT __declspec(dllexport) 1.41 +# else 1.42 +# define DLLEXPORT __declspec(dllimport) 1.43 +# endif 1.44 +#elif __GNUC__ >= 4 1.45 +# define DLLEXPORT __attribute__ ((visibility("default"))) 1.46 +#else 1.47 +# define DLLEXPORT 1.48 +#endif 1.49 + 1.50 #ifdef __cplusplus 1.51 extern "C" { 1.52 #endif 1.53 @@ -100,7 +114,7 @@ 1.54 * Flags being regarded by several functions in the library: 1.55 * NULLTERM: The given UTF-8 input is NULL terminated. 1.56 * STABLE: Unicode Versioning Stability has to be respected. 1.57 - * COMPAT: Compatiblity decomposition 1.58 + * COMPAT: Compatibility decomposition 1.59 * (i.e. formatting information is lost) 1.60 * COMPOSE: Return a result with composed characters. 1.61 * DECOMPOSE: Return a result with decomposed characters. 1.62 @@ -218,6 +232,10 @@ 1.63 #define UTF8PROC_BIDI_CLASS_S 17 1.64 #define UTF8PROC_BIDI_CLASS_WS 18 1.65 #define UTF8PROC_BIDI_CLASS_ON 19 1.66 +#define UTF8PROC_BIDI_CLASS_LRI 20 /* new in Unicode 6.3 */ 1.67 +#define UTF8PROC_BIDI_CLASS_RLI 21 /* new in Unicode 6.3 */ 1.68 +#define UTF8PROC_BIDI_CLASS_FSI 22 /* new in Unicode 6.3 */ 1.69 +#define UTF8PROC_BIDI_CLASS_PDI 23 /* new in Unicode 6.3 */ 1.70 #define UTF8PROC_DECOMP_TYPE_FONT 1 1.71 #define UTF8PROC_DECOMP_TYPE_NOBREAK 2 1.72 #define UTF8PROC_DECOMP_TYPE_INITIAL 3 1.73 @@ -235,16 +253,16 @@ 1.74 #define UTF8PROC_DECOMP_TYPE_FRACTION 15 1.75 #define UTF8PROC_DECOMP_TYPE_COMPAT 16 1.76 1.77 -extern const int8_t utf8proc_utf8class[256]; 1.78 +DLLEXPORT extern const int8_t utf8proc_utf8class[256]; 1.79 1.80 -const char *utf8proc_version(void); 1.81 +DLLEXPORT const char *utf8proc_version(void); 1.82 1.83 -const char *utf8proc_errmsg(ssize_t errcode); 1.84 +DLLEXPORT const char *utf8proc_errmsg(ssize_t errcode); 1.85 /* 1.86 * Returns a static error string for the given error code. 1.87 */ 1.88 1.89 -ssize_t utf8proc_iterate(const uint8_t *str, ssize_t strlen, int32_t *dst); 1.90 +DLLEXPORT ssize_t utf8proc_iterate(const uint8_t *str, ssize_t strlen, int32_t *dst); 1.91 /* 1.92 * Reads a single char from the UTF-8 sequence being pointed to by 'str'. 1.93 * The maximum number of bytes read is 'strlen', unless 'strlen' is 1.94 @@ -255,12 +273,12 @@ 1.95 * negative error code is returned. 1.96 */ 1.97 1.98 -bool utf8proc_codepoint_valid(int32_t uc); 1.99 +DLLEXPORT bool utf8proc_codepoint_valid(int32_t uc); 1.100 /* 1.101 * Returns 1, if the given unicode code-point is valid, otherwise 0. 1.102 */ 1.103 1.104 -ssize_t utf8proc_encode_char(int32_t uc, uint8_t *dst); 1.105 +DLLEXPORT ssize_t utf8proc_encode_char(int32_t uc, uint8_t *dst); 1.106 /* 1.107 * Encodes the unicode char with the code point 'uc' as an UTF-8 string in 1.108 * the byte array being pointed to by 'dst'. This array has to be at least 1.109 @@ -270,7 +288,7 @@ 1.110 * This function does not check if 'uc' is a valid unicode code point. 1.111 */ 1.112 1.113 -const utf8proc_property_t *utf8proc_get_property(int32_t uc); 1.114 +DLLEXPORT const utf8proc_property_t *utf8proc_get_property(int32_t uc); 1.115 /* 1.116 * Returns a pointer to a (constant) struct containing information about 1.117 * the unicode char with the given code point 'uc'. 1.118 @@ -280,7 +298,7 @@ 1.119 * 0x10FFFF, otherwise the program might crash! 1.120 */ 1.121 1.122 -ssize_t utf8proc_decompose_char( 1.123 +DLLEXPORT ssize_t utf8proc_decompose_char( 1.124 int32_t uc, int32_t *dst, ssize_t bufsize, 1.125 int options, int *last_boundclass 1.126 ); 1.127 @@ -308,7 +326,7 @@ 1.128 * 0x10FFFF, otherwise the program might crash! 1.129 */ 1.130 1.131 -ssize_t utf8proc_decompose( 1.132 +DLLEXPORT ssize_t utf8proc_decompose( 1.133 const uint8_t *str, ssize_t strlen, 1.134 int32_t *buffer, ssize_t bufsize, int options 1.135 ); 1.136 @@ -326,7 +344,7 @@ 1.137 * buffer size is returned. 1.138 */ 1.139 1.140 -ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, int options); 1.141 +DLLEXPORT ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, int options); 1.142 /* 1.143 * Reencodes the sequence of unicode characters given by the pointer 1.144 * 'buffer' and 'length' as UTF-8. 1.145 @@ -349,7 +367,7 @@ 1.146 * crash! 1.147 */ 1.148 1.149 -ssize_t utf8proc_map( 1.150 +DLLEXPORT ssize_t utf8proc_map( 1.151 const uint8_t *str, ssize_t strlen, uint8_t **dstptr, int options 1.152 ); 1.153 /* 1.154 @@ -368,10 +386,10 @@ 1.155 * 'malloc', and has theirfore to be freed with 'free'. 1.156 */ 1.157 1.158 -uint8_t *utf8proc_NFD(const uint8_t *str); 1.159 -uint8_t *utf8proc_NFC(const uint8_t *str); 1.160 -uint8_t *utf8proc_NFKD(const uint8_t *str); 1.161 -uint8_t *utf8proc_NFKC(const uint8_t *str); 1.162 +DLLEXPORT uint8_t *utf8proc_NFD(const uint8_t *str); 1.163 +DLLEXPORT uint8_t *utf8proc_NFC(const uint8_t *str); 1.164 +DLLEXPORT uint8_t *utf8proc_NFKD(const uint8_t *str); 1.165 +DLLEXPORT uint8_t *utf8proc_NFKC(const uint8_t *str); 1.166 /* 1.167 * Returns a pointer to newly allocated memory of a NFD, NFC, NFKD or NFKC 1.168 * normalized version of the null-terminated string 'str'.