utf8proc
annotate data_generator.rb @ 7:fcfd8c836c64
Version 1.1.1
- Added a new PostgreSQL function 'unistrip', which behaves like 'unifold', but also removes all character marks (e.g. accents).
- Changed license from BSD to MIT style.
- Added a new function 'utf8proc_codepoint_valid' to the C library.
- Changed compiler flags in Makefile from -g -O0 to -O2
- The ruby script, which was used to build the utf8proc_data.c file, is now included in the distribution.
- Added a new PostgreSQL function 'unistrip', which behaves like 'unifold', but also removes all character marks (e.g. accents).
- Changed license from BSD to MIT style.
- Added a new function 'utf8proc_codepoint_valid' to the C library.
- Changed compiler flags in Makefile from -g -O0 to -O2
- The ruby script, which was used to build the utf8proc_data.c file, is now included in the distribution.
author | jbe |
---|---|
date | Sun Jul 22 12:00:00 2007 +0200 (2007-07-22) |
parents | |
children | 6921ee309940 |
rev | line source |
---|---|
jbe@7 | 1 #!/usr/pkg/bin/ruby |
jbe@7 | 2 |
jbe@7 | 3 # This file was used to generate the 'unicode_data.c' file by parsing the |
jbe@7 | 4 # Unicode data file 'UnicodeData.txt' of the Unicode Character Database. |
jbe@7 | 5 # It is included for informational purposes only and not intended for |
jbe@7 | 6 # production use. |
jbe@7 | 7 |
jbe@7 | 8 |
jbe@7 | 9 # Copyright (c) 2006-2007 Jan Behrens, FlexiGuided GmbH, Berlin |
jbe@7 | 10 # |
jbe@7 | 11 # Permission is hereby granted, free of charge, to any person obtaining a |
jbe@7 | 12 # copy of this software and associated documentation files (the "Software"), |
jbe@7 | 13 # to deal in the Software without restriction, including without limitation |
jbe@7 | 14 # the rights to use, copy, modify, merge, publish, distribute, sublicense, |
jbe@7 | 15 # and/or sell copies of the Software, and to permit persons to whom the |
jbe@7 | 16 # Software is furnished to do so, subject to the following conditions: |
jbe@7 | 17 # |
jbe@7 | 18 # The above copyright notice and this permission notice shall be included in |
jbe@7 | 19 # all copies or substantial portions of the Software. |
jbe@7 | 20 # |
jbe@7 | 21 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
jbe@7 | 22 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
jbe@7 | 23 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
jbe@7 | 24 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
jbe@7 | 25 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
jbe@7 | 26 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
jbe@7 | 27 # DEALINGS IN THE SOFTWARE. |
jbe@7 | 28 |
jbe@7 | 29 |
jbe@7 | 30 # This file contains derived data from a modified version of the |
jbe@7 | 31 # Unicode data files. The following license applies to that data: |
jbe@7 | 32 # |
jbe@7 | 33 # COPYRIGHT AND PERMISSION NOTICE |
jbe@7 | 34 # |
jbe@7 | 35 # Copyright (c) 1991-2007 Unicode, Inc. All rights reserved. Distributed |
jbe@7 | 36 # under the Terms of Use in http://www.unicode.org/copyright.html. |
jbe@7 | 37 # |
jbe@7 | 38 # Permission is hereby granted, free of charge, to any person obtaining a |
jbe@7 | 39 # copy of the Unicode data files and any associated documentation (the "Data |
jbe@7 | 40 # Files") or Unicode software and any associated documentation (the |
jbe@7 | 41 # "Software") to deal in the Data Files or Software without restriction, |
jbe@7 | 42 # including without limitation the rights to use, copy, modify, merge, |
jbe@7 | 43 # publish, distribute, and/or sell copies of the Data Files or Software, and |
jbe@7 | 44 # to permit persons to whom the Data Files or Software are furnished to do |
jbe@7 | 45 # so, provided that (a) the above copyright notice(s) and this permission |
jbe@7 | 46 # notice appear with all copies of the Data Files or Software, (b) both the |
jbe@7 | 47 # above copyright notice(s) and this permission notice appear in associated |
jbe@7 | 48 # documentation, and (c) there is clear notice in each modified Data File or |
jbe@7 | 49 # in the Software as well as in the documentation associated with the Data |
jbe@7 | 50 # File(s) or Software that the data or software has been modified. |
jbe@7 | 51 # |
jbe@7 | 52 # THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY |
jbe@7 | 53 # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
jbe@7 | 54 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF |
jbe@7 | 55 # THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS |
jbe@7 | 56 # INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR |
jbe@7 | 57 # CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF |
jbe@7 | 58 # USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER |
jbe@7 | 59 # TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR |
jbe@7 | 60 # PERFORMANCE OF THE DATA FILES OR SOFTWARE. |
jbe@7 | 61 # |
jbe@7 | 62 # Except as contained in this notice, the name of a copyright holder shall |
jbe@7 | 63 # not be used in advertising or otherwise to promote the sale, use or other |
jbe@7 | 64 # dealings in these Data Files or Software without prior written |
jbe@7 | 65 # authorization of the copyright holder. |
jbe@7 | 66 |
jbe@7 | 67 |
jbe@7 | 68 |
jbe@7 | 69 $ignorable_list = <<END_OF_LIST |
jbe@7 | 70 0000..0008 ; Default_Ignorable_Code_Point # Cc [9] <control-0000>..<control-0008> |
jbe@7 | 71 000E..001F ; Default_Ignorable_Code_Point # Cc [18] <control-000E>..<control-001F> |
jbe@7 | 72 007F..0084 ; Default_Ignorable_Code_Point # Cc [6] <control-007F>..<control-0084> |
jbe@7 | 73 0086..009F ; Default_Ignorable_Code_Point # Cc [26] <control-0086>..<control-009F> |
jbe@7 | 74 00AD ; Default_Ignorable_Code_Point # Cf SOFT HYPHEN |
jbe@7 | 75 034F ; Default_Ignorable_Code_Point # Mn COMBINING GRAPHEME JOINER |
jbe@7 | 76 0600..0603 ; Default_Ignorable_Code_Point # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA |
jbe@7 | 77 06DD ; Default_Ignorable_Code_Point # Cf ARABIC END OF AYAH |
jbe@7 | 78 070F ; Default_Ignorable_Code_Point # Cf SYRIAC ABBREVIATION MARK |
jbe@7 | 79 115F..1160 ; Default_Ignorable_Code_Point # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER |
jbe@7 | 80 17B4..17B5 ; Default_Ignorable_Code_Point # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA |
jbe@7 | 81 180B..180D ; Default_Ignorable_Code_Point # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE |
jbe@7 | 82 200B..200F ; Default_Ignorable_Code_Point # Cf [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK |
jbe@7 | 83 202A..202E ; Default_Ignorable_Code_Point # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE |
jbe@7 | 84 2060..2063 ; Default_Ignorable_Code_Point # Cf [4] WORD JOINER..INVISIBLE SEPARATOR |
jbe@7 | 85 2064..2069 ; Default_Ignorable_Code_Point # Cn [6] <reserved-2064>..<reserved-2069> |
jbe@7 | 86 206A..206F ; Default_Ignorable_Code_Point # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES |
jbe@7 | 87 3164 ; Default_Ignorable_Code_Point # Lo HANGUL FILLER |
jbe@7 | 88 D800..DFFF ; Default_Ignorable_Code_Point # Cs [2048] <surrogate-D800>..<surrogate-DFFF> |
jbe@7 | 89 FE00..FE0F ; Default_Ignorable_Code_Point # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 |
jbe@7 | 90 FEFF ; Default_Ignorable_Code_Point # Cf ZERO WIDTH NO-BREAK SPACE |
jbe@7 | 91 FFA0 ; Default_Ignorable_Code_Point # Lo HALFWIDTH HANGUL FILLER |
jbe@7 | 92 FFF0..FFF8 ; Default_Ignorable_Code_Point # Cn [9] <reserved-FFF0>..<reserved-FFF8> |
jbe@7 | 93 1D173..1D17A ; Default_Ignorable_Code_Point # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE |
jbe@7 | 94 E0001 ; Default_Ignorable_Code_Point # Cf LANGUAGE TAG |
jbe@7 | 95 E0002..E001F ; Default_Ignorable_Code_Point # Cn [30] <reserved-E0002>..<reserved-E001F> |
jbe@7 | 96 E0020..E007F ; Default_Ignorable_Code_Point # Cf [96] TAG SPACE..CANCEL TAG |
jbe@7 | 97 E0080..E00FF ; Default_Ignorable_Code_Point # Cn [128] <reserved-E0080>..<reserved-E00FF> |
jbe@7 | 98 E0100..E01EF ; Default_Ignorable_Code_Point # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 |
jbe@7 | 99 E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>..<reserved-E0FFF> |
jbe@7 | 100 END_OF_LIST |
jbe@7 | 101 |
jbe@7 | 102 $ignorable = [] |
jbe@7 | 103 $ignorable_list.each do |entry| |
jbe@7 | 104 if entry =~ /^([0-9A-F]+)..([0-9A-F]+)/ |
jbe@7 | 105 $1.hex.upto($2.hex) { |e2| $ignorable << e2 } |
jbe@7 | 106 elsif entry =~ /^[0-9A-F]+/ |
jbe@7 | 107 $ignorable << $&.hex |
jbe@7 | 108 end |
jbe@7 | 109 end |
jbe@7 | 110 |
jbe@7 | 111 $grapheme_extend_list = <<END_OF_LIST |
jbe@7 | 112 0300..036F ; Grapheme_Extend # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X |
jbe@7 | 113 0483..0486 ; Grapheme_Extend # Mn [4] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC PSILI PNEUMATA |
jbe@7 | 114 0488..0489 ; Grapheme_Extend # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN |
jbe@7 | 115 0591..05BD ; Grapheme_Extend # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG |
jbe@7 | 116 05BF ; Grapheme_Extend # Mn HEBREW POINT RAFE |
jbe@7 | 117 05C1..05C2 ; Grapheme_Extend # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT |
jbe@7 | 118 05C4..05C5 ; Grapheme_Extend # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT |
jbe@7 | 119 05C7 ; Grapheme_Extend # Mn HEBREW POINT QAMATS QATAN |
jbe@7 | 120 0610..0615 ; Grapheme_Extend # Mn [6] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL HIGH TAH |
jbe@7 | 121 064B..065E ; Grapheme_Extend # Mn [20] ARABIC FATHATAN..ARABIC FATHA WITH TWO DOTS |
jbe@7 | 122 0670 ; Grapheme_Extend # Mn ARABIC LETTER SUPERSCRIPT ALEF |
jbe@7 | 123 06D6..06DC ; Grapheme_Extend # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN |
jbe@7 | 124 06DE ; Grapheme_Extend # Me ARABIC START OF RUB EL HIZB |
jbe@7 | 125 06DF..06E4 ; Grapheme_Extend # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA |
jbe@7 | 126 06E7..06E8 ; Grapheme_Extend # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON |
jbe@7 | 127 06EA..06ED ; Grapheme_Extend # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM |
jbe@7 | 128 0711 ; Grapheme_Extend # Mn SYRIAC LETTER SUPERSCRIPT ALAPH |
jbe@7 | 129 0730..074A ; Grapheme_Extend # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH |
jbe@7 | 130 07A6..07B0 ; Grapheme_Extend # Mn [11] THAANA ABAFILI..THAANA SUKUN |
jbe@7 | 131 07EB..07F3 ; Grapheme_Extend # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE |
jbe@7 | 132 0901..0902 ; Grapheme_Extend # Mn [2] DEVANAGARI SIGN CANDRABINDU..DEVANAGARI SIGN ANUSVARA |
jbe@7 | 133 093C ; Grapheme_Extend # Mn DEVANAGARI SIGN NUKTA |
jbe@7 | 134 0941..0948 ; Grapheme_Extend # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI |
jbe@7 | 135 094D ; Grapheme_Extend # Mn DEVANAGARI SIGN VIRAMA |
jbe@7 | 136 0951..0954 ; Grapheme_Extend # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT |
jbe@7 | 137 0962..0963 ; Grapheme_Extend # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL |
jbe@7 | 138 0981 ; Grapheme_Extend # Mn BENGALI SIGN CANDRABINDU |
jbe@7 | 139 09BC ; Grapheme_Extend # Mn BENGALI SIGN NUKTA |
jbe@7 | 140 09BE ; Grapheme_Extend # Mc BENGALI VOWEL SIGN AA |
jbe@7 | 141 09C1..09C4 ; Grapheme_Extend # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR |
jbe@7 | 142 09CD ; Grapheme_Extend # Mn BENGALI SIGN VIRAMA |
jbe@7 | 143 09D7 ; Grapheme_Extend # Mc BENGALI AU LENGTH MARK |
jbe@7 | 144 09E2..09E3 ; Grapheme_Extend # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL |
jbe@7 | 145 0A01..0A02 ; Grapheme_Extend # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI |
jbe@7 | 146 0A3C ; Grapheme_Extend # Mn GURMUKHI SIGN NUKTA |
jbe@7 | 147 0A41..0A42 ; Grapheme_Extend # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU |
jbe@7 | 148 0A47..0A48 ; Grapheme_Extend # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI |
jbe@7 | 149 0A4B..0A4D ; Grapheme_Extend # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA |
jbe@7 | 150 0A70..0A71 ; Grapheme_Extend # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK |
jbe@7 | 151 0A81..0A82 ; Grapheme_Extend # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA |
jbe@7 | 152 0ABC ; Grapheme_Extend # Mn GUJARATI SIGN NUKTA |
jbe@7 | 153 0AC1..0AC5 ; Grapheme_Extend # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E |
jbe@7 | 154 0AC7..0AC8 ; Grapheme_Extend # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI |
jbe@7 | 155 0ACD ; Grapheme_Extend # Mn GUJARATI SIGN VIRAMA |
jbe@7 | 156 0AE2..0AE3 ; Grapheme_Extend # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL |
jbe@7 | 157 0B01 ; Grapheme_Extend # Mn ORIYA SIGN CANDRABINDU |
jbe@7 | 158 0B3C ; Grapheme_Extend # Mn ORIYA SIGN NUKTA |
jbe@7 | 159 0B3E ; Grapheme_Extend # Mc ORIYA VOWEL SIGN AA |
jbe@7 | 160 0B3F ; Grapheme_Extend # Mn ORIYA VOWEL SIGN I |
jbe@7 | 161 0B41..0B43 ; Grapheme_Extend # Mn [3] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC R |
jbe@7 | 162 0B4D ; Grapheme_Extend # Mn ORIYA SIGN VIRAMA |
jbe@7 | 163 0B56 ; Grapheme_Extend # Mn ORIYA AI LENGTH MARK |
jbe@7 | 164 0B57 ; Grapheme_Extend # Mc ORIYA AU LENGTH MARK |
jbe@7 | 165 0B82 ; Grapheme_Extend # Mn TAMIL SIGN ANUSVARA |
jbe@7 | 166 0BBE ; Grapheme_Extend # Mc TAMIL VOWEL SIGN AA |
jbe@7 | 167 0BC0 ; Grapheme_Extend # Mn TAMIL VOWEL SIGN II |
jbe@7 | 168 0BCD ; Grapheme_Extend # Mn TAMIL SIGN VIRAMA |
jbe@7 | 169 0BD7 ; Grapheme_Extend # Mc TAMIL AU LENGTH MARK |
jbe@7 | 170 0C3E..0C40 ; Grapheme_Extend # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II |
jbe@7 | 171 0C46..0C48 ; Grapheme_Extend # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI |
jbe@7 | 172 0C4A..0C4D ; Grapheme_Extend # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA |
jbe@7 | 173 0C55..0C56 ; Grapheme_Extend # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK |
jbe@7 | 174 0CBC ; Grapheme_Extend # Mn KANNADA SIGN NUKTA |
jbe@7 | 175 0CBF ; Grapheme_Extend # Mn KANNADA VOWEL SIGN I |
jbe@7 | 176 0CC2 ; Grapheme_Extend # Mc KANNADA VOWEL SIGN UU |
jbe@7 | 177 0CC6 ; Grapheme_Extend # Mn KANNADA VOWEL SIGN E |
jbe@7 | 178 0CCC..0CCD ; Grapheme_Extend # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA |
jbe@7 | 179 0CD5..0CD6 ; Grapheme_Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK |
jbe@7 | 180 0CE2..0CE3 ; Grapheme_Extend # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL |
jbe@7 | 181 0D3E ; Grapheme_Extend # Mc MALAYALAM VOWEL SIGN AA |
jbe@7 | 182 0D41..0D43 ; Grapheme_Extend # Mn [3] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC R |
jbe@7 | 183 0D4D ; Grapheme_Extend # Mn MALAYALAM SIGN VIRAMA |
jbe@7 | 184 0D57 ; Grapheme_Extend # Mc MALAYALAM AU LENGTH MARK |
jbe@7 | 185 0DCA ; Grapheme_Extend # Mn SINHALA SIGN AL-LAKUNA |
jbe@7 | 186 0DCF ; Grapheme_Extend # Mc SINHALA VOWEL SIGN AELA-PILLA |
jbe@7 | 187 0DD2..0DD4 ; Grapheme_Extend # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA |
jbe@7 | 188 0DD6 ; Grapheme_Extend # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA |
jbe@7 | 189 0DDF ; Grapheme_Extend # Mc SINHALA VOWEL SIGN GAYANUKITTA |
jbe@7 | 190 0E31 ; Grapheme_Extend # Mn THAI CHARACTER MAI HAN-AKAT |
jbe@7 | 191 0E34..0E3A ; Grapheme_Extend # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU |
jbe@7 | 192 0E47..0E4E ; Grapheme_Extend # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN |
jbe@7 | 193 0EB1 ; Grapheme_Extend # Mn LAO VOWEL SIGN MAI KAN |
jbe@7 | 194 0EB4..0EB9 ; Grapheme_Extend # Mn [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU |
jbe@7 | 195 0EBB..0EBC ; Grapheme_Extend # Mn [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO |
jbe@7 | 196 0EC8..0ECD ; Grapheme_Extend # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA |
jbe@7 | 197 0F18..0F19 ; Grapheme_Extend # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS |
jbe@7 | 198 0F35 ; Grapheme_Extend # Mn TIBETAN MARK NGAS BZUNG NYI ZLA |
jbe@7 | 199 0F37 ; Grapheme_Extend # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS |
jbe@7 | 200 0F39 ; Grapheme_Extend # Mn TIBETAN MARK TSA -PHRU |
jbe@7 | 201 0F71..0F7E ; Grapheme_Extend # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO |
jbe@7 | 202 0F80..0F84 ; Grapheme_Extend # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA |
jbe@7 | 203 0F86..0F87 ; Grapheme_Extend # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS |
jbe@7 | 204 0F90..0F97 ; Grapheme_Extend # Mn [8] TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOINED LETTER JA |
jbe@7 | 205 0F99..0FBC ; Grapheme_Extend # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA |
jbe@7 | 206 0FC6 ; Grapheme_Extend # Mn TIBETAN SYMBOL PADMA GDAN |
jbe@7 | 207 102D..1030 ; Grapheme_Extend # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU |
jbe@7 | 208 1032 ; Grapheme_Extend # Mn MYANMAR VOWEL SIGN AI |
jbe@7 | 209 1036..1037 ; Grapheme_Extend # Mn [2] MYANMAR SIGN ANUSVARA..MYANMAR SIGN DOT BELOW |
jbe@7 | 210 1039 ; Grapheme_Extend # Mn MYANMAR SIGN VIRAMA |
jbe@7 | 211 1058..1059 ; Grapheme_Extend # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL |
jbe@7 | 212 135F ; Grapheme_Extend # Mn ETHIOPIC COMBINING GEMINATION MARK |
jbe@7 | 213 1712..1714 ; Grapheme_Extend # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA |
jbe@7 | 214 1732..1734 ; Grapheme_Extend # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD |
jbe@7 | 215 1752..1753 ; Grapheme_Extend # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U |
jbe@7 | 216 1772..1773 ; Grapheme_Extend # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U |
jbe@7 | 217 17B7..17BD ; Grapheme_Extend # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA |
jbe@7 | 218 17C6 ; Grapheme_Extend # Mn KHMER SIGN NIKAHIT |
jbe@7 | 219 17C9..17D3 ; Grapheme_Extend # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT |
jbe@7 | 220 17DD ; Grapheme_Extend # Mn KHMER SIGN ATTHACAN |
jbe@7 | 221 180B..180D ; Grapheme_Extend # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE |
jbe@7 | 222 18A9 ; Grapheme_Extend # Mn MONGOLIAN LETTER ALI GALI DAGALGA |
jbe@7 | 223 1920..1922 ; Grapheme_Extend # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U |
jbe@7 | 224 1927..1928 ; Grapheme_Extend # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O |
jbe@7 | 225 1932 ; Grapheme_Extend # Mn LIMBU SMALL LETTER ANUSVARA |
jbe@7 | 226 1939..193B ; Grapheme_Extend # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I |
jbe@7 | 227 1A17..1A18 ; Grapheme_Extend # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U |
jbe@7 | 228 1B00..1B03 ; Grapheme_Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG |
jbe@7 | 229 1B34 ; Grapheme_Extend # Mn BALINESE SIGN REREKAN |
jbe@7 | 230 1B36..1B3A ; Grapheme_Extend # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA |
jbe@7 | 231 1B3C ; Grapheme_Extend # Mn BALINESE VOWEL SIGN LA LENGA |
jbe@7 | 232 1B42 ; Grapheme_Extend # Mn BALINESE VOWEL SIGN PEPET |
jbe@7 | 233 1B6B..1B73 ; Grapheme_Extend # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG |
jbe@7 | 234 1DC0..1DCA ; Grapheme_Extend # Mn [11] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER R BELOW |
jbe@7 | 235 1DFE..1DFF ; Grapheme_Extend # Mn [2] COMBINING LEFT ARROWHEAD ABOVE..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW |
jbe@7 | 236 200C..200D ; Grapheme_Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER |
jbe@7 | 237 20D0..20DC ; Grapheme_Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE |
jbe@7 | 238 20DD..20E0 ; Grapheme_Extend # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH |
jbe@7 | 239 20E1 ; Grapheme_Extend # Mn COMBINING LEFT RIGHT ARROW ABOVE |
jbe@7 | 240 20E2..20E4 ; Grapheme_Extend # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE |
jbe@7 | 241 20E5..20EF ; Grapheme_Extend # Mn [11] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING RIGHT ARROW BELOW |
jbe@7 | 242 302A..302F ; Grapheme_Extend # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK |
jbe@7 | 243 3099..309A ; Grapheme_Extend # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK |
jbe@7 | 244 A806 ; Grapheme_Extend # Mn SYLOTI NAGRI SIGN HASANTA |
jbe@7 | 245 A80B ; Grapheme_Extend # Mn SYLOTI NAGRI SIGN ANUSVARA |
jbe@7 | 246 A825..A826 ; Grapheme_Extend # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E |
jbe@7 | 247 FB1E ; Grapheme_Extend # Mn HEBREW POINT JUDEO-SPANISH VARIKA |
jbe@7 | 248 FE00..FE0F ; Grapheme_Extend # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 |
jbe@7 | 249 FE20..FE23 ; Grapheme_Extend # Mn [4] COMBINING LIGATURE LEFT HALF..COMBINING DOUBLE TILDE RIGHT HALF |
jbe@7 | 250 10A01..10A03 ; Grapheme_Extend # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R |
jbe@7 | 251 10A05..10A06 ; Grapheme_Extend # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O |
jbe@7 | 252 10A0C..10A0F ; Grapheme_Extend # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA |
jbe@7 | 253 10A38..10A3A ; Grapheme_Extend # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW |
jbe@7 | 254 10A3F ; Grapheme_Extend # Mn KHAROSHTHI VIRAMA |
jbe@7 | 255 1D165 ; Grapheme_Extend # Mc MUSICAL SYMBOL COMBINING STEM |
jbe@7 | 256 1D167..1D169 ; Grapheme_Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 |
jbe@7 | 257 1D16E..1D172 ; Grapheme_Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5 |
jbe@7 | 258 1D17B..1D182 ; Grapheme_Extend # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE |
jbe@7 | 259 1D185..1D18B ; Grapheme_Extend # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE |
jbe@7 | 260 1D1AA..1D1AD ; Grapheme_Extend # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO |
jbe@7 | 261 1D242..1D244 ; Grapheme_Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME |
jbe@7 | 262 E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 |
jbe@7 | 263 END_OF_LIST |
jbe@7 | 264 |
jbe@7 | 265 $grapheme_extend = [] |
jbe@7 | 266 $grapheme_extend_list.each do |entry| |
jbe@7 | 267 if entry =~ /^([0-9A-F]+)..([0-9A-F]+)/ |
jbe@7 | 268 $1.hex.upto($2.hex) { |e2| $grapheme_extend << e2 } |
jbe@7 | 269 elsif entry =~ /^[0-9A-F]+/ |
jbe@7 | 270 $grapheme_extend << $&.hex |
jbe@7 | 271 end |
jbe@7 | 272 end |
jbe@7 | 273 |
jbe@7 | 274 $exclusions = <<END_OF_LIST |
jbe@7 | 275 0958 # DEVANAGARI LETTER QA |
jbe@7 | 276 0959 # DEVANAGARI LETTER KHHA |
jbe@7 | 277 095A # DEVANAGARI LETTER GHHA |
jbe@7 | 278 095B # DEVANAGARI LETTER ZA |
jbe@7 | 279 095C # DEVANAGARI LETTER DDDHA |
jbe@7 | 280 095D # DEVANAGARI LETTER RHA |
jbe@7 | 281 095E # DEVANAGARI LETTER FA |
jbe@7 | 282 095F # DEVANAGARI LETTER YYA |
jbe@7 | 283 09DC # BENGALI LETTER RRA |
jbe@7 | 284 09DD # BENGALI LETTER RHA |
jbe@7 | 285 09DF # BENGALI LETTER YYA |
jbe@7 | 286 0A33 # GURMUKHI LETTER LLA |
jbe@7 | 287 0A36 # GURMUKHI LETTER SHA |
jbe@7 | 288 0A59 # GURMUKHI LETTER KHHA |
jbe@7 | 289 0A5A # GURMUKHI LETTER GHHA |
jbe@7 | 290 0A5B # GURMUKHI LETTER ZA |
jbe@7 | 291 0A5E # GURMUKHI LETTER FA |
jbe@7 | 292 0B5C # ORIYA LETTER RRA |
jbe@7 | 293 0B5D # ORIYA LETTER RHA |
jbe@7 | 294 0F43 # TIBETAN LETTER GHA |
jbe@7 | 295 0F4D # TIBETAN LETTER DDHA |
jbe@7 | 296 0F52 # TIBETAN LETTER DHA |
jbe@7 | 297 0F57 # TIBETAN LETTER BHA |
jbe@7 | 298 0F5C # TIBETAN LETTER DZHA |
jbe@7 | 299 0F69 # TIBETAN LETTER KSSA |
jbe@7 | 300 0F76 # TIBETAN VOWEL SIGN VOCALIC R |
jbe@7 | 301 0F78 # TIBETAN VOWEL SIGN VOCALIC L |
jbe@7 | 302 0F93 # TIBETAN SUBJOINED LETTER GHA |
jbe@7 | 303 0F9D # TIBETAN SUBJOINED LETTER DDHA |
jbe@7 | 304 0FA2 # TIBETAN SUBJOINED LETTER DHA |
jbe@7 | 305 0FA7 # TIBETAN SUBJOINED LETTER BHA |
jbe@7 | 306 0FAC # TIBETAN SUBJOINED LETTER DZHA |
jbe@7 | 307 0FB9 # TIBETAN SUBJOINED LETTER KSSA |
jbe@7 | 308 FB1D # HEBREW LETTER YOD WITH HIRIQ |
jbe@7 | 309 FB1F # HEBREW LIGATURE YIDDISH YOD YOD PATAH |
jbe@7 | 310 FB2A # HEBREW LETTER SHIN WITH SHIN DOT |
jbe@7 | 311 FB2B # HEBREW LETTER SHIN WITH SIN DOT |
jbe@7 | 312 FB2C # HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT |
jbe@7 | 313 FB2D # HEBREW LETTER SHIN WITH DAGESH AND SIN DOT |
jbe@7 | 314 FB2E # HEBREW LETTER ALEF WITH PATAH |
jbe@7 | 315 FB2F # HEBREW LETTER ALEF WITH QAMATS |
jbe@7 | 316 FB30 # HEBREW LETTER ALEF WITH MAPIQ |
jbe@7 | 317 FB31 # HEBREW LETTER BET WITH DAGESH |
jbe@7 | 318 FB32 # HEBREW LETTER GIMEL WITH DAGESH |
jbe@7 | 319 FB33 # HEBREW LETTER DALET WITH DAGESH |
jbe@7 | 320 FB34 # HEBREW LETTER HE WITH MAPIQ |
jbe@7 | 321 FB35 # HEBREW LETTER VAV WITH DAGESH |
jbe@7 | 322 FB36 # HEBREW LETTER ZAYIN WITH DAGESH |
jbe@7 | 323 FB38 # HEBREW LETTER TET WITH DAGESH |
jbe@7 | 324 FB39 # HEBREW LETTER YOD WITH DAGESH |
jbe@7 | 325 FB3A # HEBREW LETTER FINAL KAF WITH DAGESH |
jbe@7 | 326 FB3B # HEBREW LETTER KAF WITH DAGESH |
jbe@7 | 327 FB3C # HEBREW LETTER LAMED WITH DAGESH |
jbe@7 | 328 FB3E # HEBREW LETTER MEM WITH DAGESH |
jbe@7 | 329 FB40 # HEBREW LETTER NUN WITH DAGESH |
jbe@7 | 330 FB41 # HEBREW LETTER SAMEKH WITH DAGESH |
jbe@7 | 331 FB43 # HEBREW LETTER FINAL PE WITH DAGESH |
jbe@7 | 332 FB44 # HEBREW LETTER PE WITH DAGESH |
jbe@7 | 333 FB46 # HEBREW LETTER TSADI WITH DAGESH |
jbe@7 | 334 FB47 # HEBREW LETTER QOF WITH DAGESH |
jbe@7 | 335 FB48 # HEBREW LETTER RESH WITH DAGESH |
jbe@7 | 336 FB49 # HEBREW LETTER SHIN WITH DAGESH |
jbe@7 | 337 FB4A # HEBREW LETTER TAV WITH DAGESH |
jbe@7 | 338 FB4B # HEBREW LETTER VAV WITH HOLAM |
jbe@7 | 339 FB4C # HEBREW LETTER BET WITH RAFE |
jbe@7 | 340 FB4D # HEBREW LETTER KAF WITH RAFE |
jbe@7 | 341 FB4E # HEBREW LETTER PE WITH RAFE |
jbe@7 | 342 END_OF_LIST |
jbe@7 | 343 $exclusions = $exclusions.chomp.split("\n").collect { |e| e.hex } |
jbe@7 | 344 |
jbe@7 | 345 $excl_version = <<END_OF_LIST |
jbe@7 | 346 2ADC # FORKING |
jbe@7 | 347 1D15E # MUSICAL SYMBOL HALF NOTE |
jbe@7 | 348 1D15F # MUSICAL SYMBOL QUARTER NOTE |
jbe@7 | 349 1D160 # MUSICAL SYMBOL EIGHTH NOTE |
jbe@7 | 350 1D161 # MUSICAL SYMBOL SIXTEENTH NOTE |
jbe@7 | 351 1D162 # MUSICAL SYMBOL THIRTY-SECOND NOTE |
jbe@7 | 352 1D163 # MUSICAL SYMBOL SIXTY-FOURTH NOTE |
jbe@7 | 353 1D164 # MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE |
jbe@7 | 354 1D1BB # MUSICAL SYMBOL MINIMA |
jbe@7 | 355 1D1BC # MUSICAL SYMBOL MINIMA BLACK |
jbe@7 | 356 1D1BD # MUSICAL SYMBOL SEMIMINIMA WHITE |
jbe@7 | 357 1D1BE # MUSICAL SYMBOL SEMIMINIMA BLACK |
jbe@7 | 358 1D1BF # MUSICAL SYMBOL FUSA WHITE |
jbe@7 | 359 1D1C0 # MUSICAL SYMBOL FUSA BLACK |
jbe@7 | 360 END_OF_LIST |
jbe@7 | 361 $excl_version = $excl_version.chomp.split("\n").collect { |e| e.hex } |
jbe@7 | 362 |
jbe@7 | 363 $case_folding_string = <<END_OF_LIST |
jbe@7 | 364 0041; C; 0061; # LATIN CAPITAL LETTER A |
jbe@7 | 365 0042; C; 0062; # LATIN CAPITAL LETTER B |
jbe@7 | 366 0043; C; 0063; # LATIN CAPITAL LETTER C |
jbe@7 | 367 0044; C; 0064; # LATIN CAPITAL LETTER D |
jbe@7 | 368 0045; C; 0065; # LATIN CAPITAL LETTER E |
jbe@7 | 369 0046; C; 0066; # LATIN CAPITAL LETTER F |
jbe@7 | 370 0047; C; 0067; # LATIN CAPITAL LETTER G |
jbe@7 | 371 0048; C; 0068; # LATIN CAPITAL LETTER H |
jbe@7 | 372 0049; C; 0069; # LATIN CAPITAL LETTER I |
jbe@7 | 373 004A; C; 006A; # LATIN CAPITAL LETTER J |
jbe@7 | 374 004B; C; 006B; # LATIN CAPITAL LETTER K |
jbe@7 | 375 004C; C; 006C; # LATIN CAPITAL LETTER L |
jbe@7 | 376 004D; C; 006D; # LATIN CAPITAL LETTER M |
jbe@7 | 377 004E; C; 006E; # LATIN CAPITAL LETTER N |
jbe@7 | 378 004F; C; 006F; # LATIN CAPITAL LETTER O |
jbe@7 | 379 0050; C; 0070; # LATIN CAPITAL LETTER P |
jbe@7 | 380 0051; C; 0071; # LATIN CAPITAL LETTER Q |
jbe@7 | 381 0052; C; 0072; # LATIN CAPITAL LETTER R |
jbe@7 | 382 0053; C; 0073; # LATIN CAPITAL LETTER S |
jbe@7 | 383 0054; C; 0074; # LATIN CAPITAL LETTER T |
jbe@7 | 384 0055; C; 0075; # LATIN CAPITAL LETTER U |
jbe@7 | 385 0056; C; 0076; # LATIN CAPITAL LETTER V |
jbe@7 | 386 0057; C; 0077; # LATIN CAPITAL LETTER W |
jbe@7 | 387 0058; C; 0078; # LATIN CAPITAL LETTER X |
jbe@7 | 388 0059; C; 0079; # LATIN CAPITAL LETTER Y |
jbe@7 | 389 005A; C; 007A; # LATIN CAPITAL LETTER Z |
jbe@7 | 390 00B5; C; 03BC; # MICRO SIGN |
jbe@7 | 391 00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE |
jbe@7 | 392 00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE |
jbe@7 | 393 00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX |
jbe@7 | 394 00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE |
jbe@7 | 395 00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS |
jbe@7 | 396 00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE |
jbe@7 | 397 00C6; C; 00E6; # LATIN CAPITAL LETTER AE |
jbe@7 | 398 00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA |
jbe@7 | 399 00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE |
jbe@7 | 400 00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE |
jbe@7 | 401 00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX |
jbe@7 | 402 00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS |
jbe@7 | 403 00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE |
jbe@7 | 404 00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE |
jbe@7 | 405 00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX |
jbe@7 | 406 00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS |
jbe@7 | 407 00D0; C; 00F0; # LATIN CAPITAL LETTER ETH |
jbe@7 | 408 00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE |
jbe@7 | 409 00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE |
jbe@7 | 410 00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE |
jbe@7 | 411 00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX |
jbe@7 | 412 00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE |
jbe@7 | 413 00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS |
jbe@7 | 414 00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE |
jbe@7 | 415 00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE |
jbe@7 | 416 00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE |
jbe@7 | 417 00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX |
jbe@7 | 418 00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS |
jbe@7 | 419 00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE |
jbe@7 | 420 00DE; C; 00FE; # LATIN CAPITAL LETTER THORN |
jbe@7 | 421 00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S |
jbe@7 | 422 0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON |
jbe@7 | 423 0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE |
jbe@7 | 424 0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK |
jbe@7 | 425 0106; C; 0107; # LATIN CAPITAL LETTER C WITH ACUTE |
jbe@7 | 426 0108; C; 0109; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX |
jbe@7 | 427 010A; C; 010B; # LATIN CAPITAL LETTER C WITH DOT ABOVE |
jbe@7 | 428 010C; C; 010D; # LATIN CAPITAL LETTER C WITH CARON |
jbe@7 | 429 010E; C; 010F; # LATIN CAPITAL LETTER D WITH CARON |
jbe@7 | 430 0110; C; 0111; # LATIN CAPITAL LETTER D WITH STROKE |
jbe@7 | 431 0112; C; 0113; # LATIN CAPITAL LETTER E WITH MACRON |
jbe@7 | 432 0114; C; 0115; # LATIN CAPITAL LETTER E WITH BREVE |
jbe@7 | 433 0116; C; 0117; # LATIN CAPITAL LETTER E WITH DOT ABOVE |
jbe@7 | 434 0118; C; 0119; # LATIN CAPITAL LETTER E WITH OGONEK |
jbe@7 | 435 011A; C; 011B; # LATIN CAPITAL LETTER E WITH CARON |
jbe@7 | 436 011C; C; 011D; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX |
jbe@7 | 437 011E; C; 011F; # LATIN CAPITAL LETTER G WITH BREVE |
jbe@7 | 438 0120; C; 0121; # LATIN CAPITAL LETTER G WITH DOT ABOVE |
jbe@7 | 439 0122; C; 0123; # LATIN CAPITAL LETTER G WITH CEDILLA |
jbe@7 | 440 0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX |
jbe@7 | 441 0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE |
jbe@7 | 442 0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE |
jbe@7 | 443 012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON |
jbe@7 | 444 012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE |
jbe@7 | 445 012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK |
jbe@7 | 446 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE |
jbe@7 | 447 0132; C; 0133; # LATIN CAPITAL LIGATURE IJ |
jbe@7 | 448 0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX |
jbe@7 | 449 0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA |
jbe@7 | 450 0139; C; 013A; # LATIN CAPITAL LETTER L WITH ACUTE |
jbe@7 | 451 013B; C; 013C; # LATIN CAPITAL LETTER L WITH CEDILLA |
jbe@7 | 452 013D; C; 013E; # LATIN CAPITAL LETTER L WITH CARON |
jbe@7 | 453 013F; C; 0140; # LATIN CAPITAL LETTER L WITH MIDDLE DOT |
jbe@7 | 454 0141; C; 0142; # LATIN CAPITAL LETTER L WITH STROKE |
jbe@7 | 455 0143; C; 0144; # LATIN CAPITAL LETTER N WITH ACUTE |
jbe@7 | 456 0145; C; 0146; # LATIN CAPITAL LETTER N WITH CEDILLA |
jbe@7 | 457 0147; C; 0148; # LATIN CAPITAL LETTER N WITH CARON |
jbe@7 | 458 0149; F; 02BC 006E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE |
jbe@7 | 459 014A; C; 014B; # LATIN CAPITAL LETTER ENG |
jbe@7 | 460 014C; C; 014D; # LATIN CAPITAL LETTER O WITH MACRON |
jbe@7 | 461 014E; C; 014F; # LATIN CAPITAL LETTER O WITH BREVE |
jbe@7 | 462 0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE |
jbe@7 | 463 0152; C; 0153; # LATIN CAPITAL LIGATURE OE |
jbe@7 | 464 0154; C; 0155; # LATIN CAPITAL LETTER R WITH ACUTE |
jbe@7 | 465 0156; C; 0157; # LATIN CAPITAL LETTER R WITH CEDILLA |
jbe@7 | 466 0158; C; 0159; # LATIN CAPITAL LETTER R WITH CARON |
jbe@7 | 467 015A; C; 015B; # LATIN CAPITAL LETTER S WITH ACUTE |
jbe@7 | 468 015C; C; 015D; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX |
jbe@7 | 469 015E; C; 015F; # LATIN CAPITAL LETTER S WITH CEDILLA |
jbe@7 | 470 0160; C; 0161; # LATIN CAPITAL LETTER S WITH CARON |
jbe@7 | 471 0162; C; 0163; # LATIN CAPITAL LETTER T WITH CEDILLA |
jbe@7 | 472 0164; C; 0165; # LATIN CAPITAL LETTER T WITH CARON |
jbe@7 | 473 0166; C; 0167; # LATIN CAPITAL LETTER T WITH STROKE |
jbe@7 | 474 0168; C; 0169; # LATIN CAPITAL LETTER U WITH TILDE |
jbe@7 | 475 016A; C; 016B; # LATIN CAPITAL LETTER U WITH MACRON |
jbe@7 | 476 016C; C; 016D; # LATIN CAPITAL LETTER U WITH BREVE |
jbe@7 | 477 016E; C; 016F; # LATIN CAPITAL LETTER U WITH RING ABOVE |
jbe@7 | 478 0170; C; 0171; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE |
jbe@7 | 479 0172; C; 0173; # LATIN CAPITAL LETTER U WITH OGONEK |
jbe@7 | 480 0174; C; 0175; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX |
jbe@7 | 481 0176; C; 0177; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX |
jbe@7 | 482 0178; C; 00FF; # LATIN CAPITAL LETTER Y WITH DIAERESIS |
jbe@7 | 483 0179; C; 017A; # LATIN CAPITAL LETTER Z WITH ACUTE |
jbe@7 | 484 017B; C; 017C; # LATIN CAPITAL LETTER Z WITH DOT ABOVE |
jbe@7 | 485 017D; C; 017E; # LATIN CAPITAL LETTER Z WITH CARON |
jbe@7 | 486 017F; C; 0073; # LATIN SMALL LETTER LONG S |
jbe@7 | 487 0181; C; 0253; # LATIN CAPITAL LETTER B WITH HOOK |
jbe@7 | 488 0182; C; 0183; # LATIN CAPITAL LETTER B WITH TOPBAR |
jbe@7 | 489 0184; C; 0185; # LATIN CAPITAL LETTER TONE SIX |
jbe@7 | 490 0186; C; 0254; # LATIN CAPITAL LETTER OPEN O |
jbe@7 | 491 0187; C; 0188; # LATIN CAPITAL LETTER C WITH HOOK |
jbe@7 | 492 0189; C; 0256; # LATIN CAPITAL LETTER AFRICAN D |
jbe@7 | 493 018A; C; 0257; # LATIN CAPITAL LETTER D WITH HOOK |
jbe@7 | 494 018B; C; 018C; # LATIN CAPITAL LETTER D WITH TOPBAR |
jbe@7 | 495 018E; C; 01DD; # LATIN CAPITAL LETTER REVERSED E |
jbe@7 | 496 018F; C; 0259; # LATIN CAPITAL LETTER SCHWA |
jbe@7 | 497 0190; C; 025B; # LATIN CAPITAL LETTER OPEN E |
jbe@7 | 498 0191; C; 0192; # LATIN CAPITAL LETTER F WITH HOOK |
jbe@7 | 499 0193; C; 0260; # LATIN CAPITAL LETTER G WITH HOOK |
jbe@7 | 500 0194; C; 0263; # LATIN CAPITAL LETTER GAMMA |
jbe@7 | 501 0196; C; 0269; # LATIN CAPITAL LETTER IOTA |
jbe@7 | 502 0197; C; 0268; # LATIN CAPITAL LETTER I WITH STROKE |
jbe@7 | 503 0198; C; 0199; # LATIN CAPITAL LETTER K WITH HOOK |
jbe@7 | 504 019C; C; 026F; # LATIN CAPITAL LETTER TURNED M |
jbe@7 | 505 019D; C; 0272; # LATIN CAPITAL LETTER N WITH LEFT HOOK |
jbe@7 | 506 019F; C; 0275; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE |
jbe@7 | 507 01A0; C; 01A1; # LATIN CAPITAL LETTER O WITH HORN |
jbe@7 | 508 01A2; C; 01A3; # LATIN CAPITAL LETTER OI |
jbe@7 | 509 01A4; C; 01A5; # LATIN CAPITAL LETTER P WITH HOOK |
jbe@7 | 510 01A6; C; 0280; # LATIN LETTER YR |
jbe@7 | 511 01A7; C; 01A8; # LATIN CAPITAL LETTER TONE TWO |
jbe@7 | 512 01A9; C; 0283; # LATIN CAPITAL LETTER ESH |
jbe@7 | 513 01AC; C; 01AD; # LATIN CAPITAL LETTER T WITH HOOK |
jbe@7 | 514 01AE; C; 0288; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK |
jbe@7 | 515 01AF; C; 01B0; # LATIN CAPITAL LETTER U WITH HORN |
jbe@7 | 516 01B1; C; 028A; # LATIN CAPITAL LETTER UPSILON |
jbe@7 | 517 01B2; C; 028B; # LATIN CAPITAL LETTER V WITH HOOK |
jbe@7 | 518 01B3; C; 01B4; # LATIN CAPITAL LETTER Y WITH HOOK |
jbe@7 | 519 01B5; C; 01B6; # LATIN CAPITAL LETTER Z WITH STROKE |
jbe@7 | 520 01B7; C; 0292; # LATIN CAPITAL LETTER EZH |
jbe@7 | 521 01B8; C; 01B9; # LATIN CAPITAL LETTER EZH REVERSED |
jbe@7 | 522 01BC; C; 01BD; # LATIN CAPITAL LETTER TONE FIVE |
jbe@7 | 523 01C4; C; 01C6; # LATIN CAPITAL LETTER DZ WITH CARON |
jbe@7 | 524 01C5; C; 01C6; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON |
jbe@7 | 525 01C7; C; 01C9; # LATIN CAPITAL LETTER LJ |
jbe@7 | 526 01C8; C; 01C9; # LATIN CAPITAL LETTER L WITH SMALL LETTER J |
jbe@7 | 527 01CA; C; 01CC; # LATIN CAPITAL LETTER NJ |
jbe@7 | 528 01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J |
jbe@7 | 529 01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON |
jbe@7 | 530 01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON |
jbe@7 | 531 01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON |
jbe@7 | 532 01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON |
jbe@7 | 533 01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON |
jbe@7 | 534 01D7; C; 01D8; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE |
jbe@7 | 535 01D9; C; 01DA; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON |
jbe@7 | 536 01DB; C; 01DC; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE |
jbe@7 | 537 01DE; C; 01DF; # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON |
jbe@7 | 538 01E0; C; 01E1; # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON |
jbe@7 | 539 01E2; C; 01E3; # LATIN CAPITAL LETTER AE WITH MACRON |
jbe@7 | 540 01E4; C; 01E5; # LATIN CAPITAL LETTER G WITH STROKE |
jbe@7 | 541 01E6; C; 01E7; # LATIN CAPITAL LETTER G WITH CARON |
jbe@7 | 542 01E8; C; 01E9; # LATIN CAPITAL LETTER K WITH CARON |
jbe@7 | 543 01EA; C; 01EB; # LATIN CAPITAL LETTER O WITH OGONEK |
jbe@7 | 544 01EC; C; 01ED; # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON |
jbe@7 | 545 01EE; C; 01EF; # LATIN CAPITAL LETTER EZH WITH CARON |
jbe@7 | 546 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON |
jbe@7 | 547 01F1; C; 01F3; # LATIN CAPITAL LETTER DZ |
jbe@7 | 548 01F2; C; 01F3; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z |
jbe@7 | 549 01F4; C; 01F5; # LATIN CAPITAL LETTER G WITH ACUTE |
jbe@7 | 550 01F6; C; 0195; # LATIN CAPITAL LETTER HWAIR |
jbe@7 | 551 01F7; C; 01BF; # LATIN CAPITAL LETTER WYNN |
jbe@7 | 552 01F8; C; 01F9; # LATIN CAPITAL LETTER N WITH GRAVE |
jbe@7 | 553 01FA; C; 01FB; # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE |
jbe@7 | 554 01FC; C; 01FD; # LATIN CAPITAL LETTER AE WITH ACUTE |
jbe@7 | 555 01FE; C; 01FF; # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE |
jbe@7 | 556 0200; C; 0201; # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE |
jbe@7 | 557 0202; C; 0203; # LATIN CAPITAL LETTER A WITH INVERTED BREVE |
jbe@7 | 558 0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE |
jbe@7 | 559 0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE |
jbe@7 | 560 0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE |
jbe@7 | 561 020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE |
jbe@7 | 562 020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE |
jbe@7 | 563 020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE |
jbe@7 | 564 0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE |
jbe@7 | 565 0212; C; 0213; # LATIN CAPITAL LETTER R WITH INVERTED BREVE |
jbe@7 | 566 0214; C; 0215; # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE |
jbe@7 | 567 0216; C; 0217; # LATIN CAPITAL LETTER U WITH INVERTED BREVE |
jbe@7 | 568 0218; C; 0219; # LATIN CAPITAL LETTER S WITH COMMA BELOW |
jbe@7 | 569 021A; C; 021B; # LATIN CAPITAL LETTER T WITH COMMA BELOW |
jbe@7 | 570 021C; C; 021D; # LATIN CAPITAL LETTER YOGH |
jbe@7 | 571 021E; C; 021F; # LATIN CAPITAL LETTER H WITH CARON |
jbe@7 | 572 0220; C; 019E; # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG |
jbe@7 | 573 0222; C; 0223; # LATIN CAPITAL LETTER OU |
jbe@7 | 574 0224; C; 0225; # LATIN CAPITAL LETTER Z WITH HOOK |
jbe@7 | 575 0226; C; 0227; # LATIN CAPITAL LETTER A WITH DOT ABOVE |
jbe@7 | 576 0228; C; 0229; # LATIN CAPITAL LETTER E WITH CEDILLA |
jbe@7 | 577 022A; C; 022B; # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON |
jbe@7 | 578 022C; C; 022D; # LATIN CAPITAL LETTER O WITH TILDE AND MACRON |
jbe@7 | 579 022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE |
jbe@7 | 580 0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON |
jbe@7 | 581 0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON |
jbe@7 | 582 023A; C; 2C65; # LATIN CAPITAL LETTER A WITH STROKE |
jbe@7 | 583 023B; C; 023C; # LATIN CAPITAL LETTER C WITH STROKE |
jbe@7 | 584 023D; C; 019A; # LATIN CAPITAL LETTER L WITH BAR |
jbe@7 | 585 023E; C; 2C66; # LATIN CAPITAL LETTER T WITH DIAGONAL STROKE |
jbe@7 | 586 0241; C; 0242; # LATIN CAPITAL LETTER GLOTTAL STOP |
jbe@7 | 587 0243; C; 0180; # LATIN CAPITAL LETTER B WITH STROKE |
jbe@7 | 588 0244; C; 0289; # LATIN CAPITAL LETTER U BAR |
jbe@7 | 589 0245; C; 028C; # LATIN CAPITAL LETTER TURNED V |
jbe@7 | 590 0246; C; 0247; # LATIN CAPITAL LETTER E WITH STROKE |
jbe@7 | 591 0248; C; 0249; # LATIN CAPITAL LETTER J WITH STROKE |
jbe@7 | 592 024A; C; 024B; # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL |
jbe@7 | 593 024C; C; 024D; # LATIN CAPITAL LETTER R WITH STROKE |
jbe@7 | 594 024E; C; 024F; # LATIN CAPITAL LETTER Y WITH STROKE |
jbe@7 | 595 0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI |
jbe@7 | 596 0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS |
jbe@7 | 597 0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS |
jbe@7 | 598 0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS |
jbe@7 | 599 038A; C; 03AF; # GREEK CAPITAL LETTER IOTA WITH TONOS |
jbe@7 | 600 038C; C; 03CC; # GREEK CAPITAL LETTER OMICRON WITH TONOS |
jbe@7 | 601 038E; C; 03CD; # GREEK CAPITAL LETTER UPSILON WITH TONOS |
jbe@7 | 602 038F; C; 03CE; # GREEK CAPITAL LETTER OMEGA WITH TONOS |
jbe@7 | 603 0390; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS |
jbe@7 | 604 0391; C; 03B1; # GREEK CAPITAL LETTER ALPHA |
jbe@7 | 605 0392; C; 03B2; # GREEK CAPITAL LETTER BETA |
jbe@7 | 606 0393; C; 03B3; # GREEK CAPITAL LETTER GAMMA |
jbe@7 | 607 0394; C; 03B4; # GREEK CAPITAL LETTER DELTA |
jbe@7 | 608 0395; C; 03B5; # GREEK CAPITAL LETTER EPSILON |
jbe@7 | 609 0396; C; 03B6; # GREEK CAPITAL LETTER ZETA |
jbe@7 | 610 0397; C; 03B7; # GREEK CAPITAL LETTER ETA |
jbe@7 | 611 0398; C; 03B8; # GREEK CAPITAL LETTER THETA |
jbe@7 | 612 0399; C; 03B9; # GREEK CAPITAL LETTER IOTA |
jbe@7 | 613 039A; C; 03BA; # GREEK CAPITAL LETTER KAPPA |
jbe@7 | 614 039B; C; 03BB; # GREEK CAPITAL LETTER LAMDA |
jbe@7 | 615 039C; C; 03BC; # GREEK CAPITAL LETTER MU |
jbe@7 | 616 039D; C; 03BD; # GREEK CAPITAL LETTER NU |
jbe@7 | 617 039E; C; 03BE; # GREEK CAPITAL LETTER XI |
jbe@7 | 618 039F; C; 03BF; # GREEK CAPITAL LETTER OMICRON |
jbe@7 | 619 03A0; C; 03C0; # GREEK CAPITAL LETTER PI |
jbe@7 | 620 03A1; C; 03C1; # GREEK CAPITAL LETTER RHO |
jbe@7 | 621 03A3; C; 03C3; # GREEK CAPITAL LETTER SIGMA |
jbe@7 | 622 03A4; C; 03C4; # GREEK CAPITAL LETTER TAU |
jbe@7 | 623 03A5; C; 03C5; # GREEK CAPITAL LETTER UPSILON |
jbe@7 | 624 03A6; C; 03C6; # GREEK CAPITAL LETTER PHI |
jbe@7 | 625 03A7; C; 03C7; # GREEK CAPITAL LETTER CHI |
jbe@7 | 626 03A8; C; 03C8; # GREEK CAPITAL LETTER PSI |
jbe@7 | 627 03A9; C; 03C9; # GREEK CAPITAL LETTER OMEGA |
jbe@7 | 628 03AA; C; 03CA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA |
jbe@7 | 629 03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA |
jbe@7 | 630 03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS |
jbe@7 | 631 03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA |
jbe@7 | 632 03D0; C; 03B2; # GREEK BETA SYMBOL |
jbe@7 | 633 03D1; C; 03B8; # GREEK THETA SYMBOL |
jbe@7 | 634 03D5; C; 03C6; # GREEK PHI SYMBOL |
jbe@7 | 635 03D6; C; 03C0; # GREEK PI SYMBOL |
jbe@7 | 636 03D8; C; 03D9; # GREEK LETTER ARCHAIC KOPPA |
jbe@7 | 637 03DA; C; 03DB; # GREEK LETTER STIGMA |
jbe@7 | 638 03DC; C; 03DD; # GREEK LETTER DIGAMMA |
jbe@7 | 639 03DE; C; 03DF; # GREEK LETTER KOPPA |
jbe@7 | 640 03E0; C; 03E1; # GREEK LETTER SAMPI |
jbe@7 | 641 03E2; C; 03E3; # COPTIC CAPITAL LETTER SHEI |
jbe@7 | 642 03E4; C; 03E5; # COPTIC CAPITAL LETTER FEI |
jbe@7 | 643 03E6; C; 03E7; # COPTIC CAPITAL LETTER KHEI |
jbe@7 | 644 03E8; C; 03E9; # COPTIC CAPITAL LETTER HORI |
jbe@7 | 645 03EA; C; 03EB; # COPTIC CAPITAL LETTER GANGIA |
jbe@7 | 646 03EC; C; 03ED; # COPTIC CAPITAL LETTER SHIMA |
jbe@7 | 647 03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI |
jbe@7 | 648 03F0; C; 03BA; # GREEK KAPPA SYMBOL |
jbe@7 | 649 03F1; C; 03C1; # GREEK RHO SYMBOL |
jbe@7 | 650 03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL |
jbe@7 | 651 03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL |
jbe@7 | 652 03F7; C; 03F8; # GREEK CAPITAL LETTER SHO |
jbe@7 | 653 03F9; C; 03F2; # GREEK CAPITAL LUNATE SIGMA SYMBOL |
jbe@7 | 654 03FA; C; 03FB; # GREEK CAPITAL LETTER SAN |
jbe@7 | 655 03FD; C; 037B; # GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL |
jbe@7 | 656 03FE; C; 037C; # GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL |
jbe@7 | 657 03FF; C; 037D; # GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL |
jbe@7 | 658 0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE |
jbe@7 | 659 0401; C; 0451; # CYRILLIC CAPITAL LETTER IO |
jbe@7 | 660 0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE |
jbe@7 | 661 0403; C; 0453; # CYRILLIC CAPITAL LETTER GJE |
jbe@7 | 662 0404; C; 0454; # CYRILLIC CAPITAL LETTER UKRAINIAN IE |
jbe@7 | 663 0405; C; 0455; # CYRILLIC CAPITAL LETTER DZE |
jbe@7 | 664 0406; C; 0456; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I |
jbe@7 | 665 0407; C; 0457; # CYRILLIC CAPITAL LETTER YI |
jbe@7 | 666 0408; C; 0458; # CYRILLIC CAPITAL LETTER JE |
jbe@7 | 667 0409; C; 0459; # CYRILLIC CAPITAL LETTER LJE |
jbe@7 | 668 040A; C; 045A; # CYRILLIC CAPITAL LETTER NJE |
jbe@7 | 669 040B; C; 045B; # CYRILLIC CAPITAL LETTER TSHE |
jbe@7 | 670 040C; C; 045C; # CYRILLIC CAPITAL LETTER KJE |
jbe@7 | 671 040D; C; 045D; # CYRILLIC CAPITAL LETTER I WITH GRAVE |
jbe@7 | 672 040E; C; 045E; # CYRILLIC CAPITAL LETTER SHORT U |
jbe@7 | 673 040F; C; 045F; # CYRILLIC CAPITAL LETTER DZHE |
jbe@7 | 674 0410; C; 0430; # CYRILLIC CAPITAL LETTER A |
jbe@7 | 675 0411; C; 0431; # CYRILLIC CAPITAL LETTER BE |
jbe@7 | 676 0412; C; 0432; # CYRILLIC CAPITAL LETTER VE |
jbe@7 | 677 0413; C; 0433; # CYRILLIC CAPITAL LETTER GHE |
jbe@7 | 678 0414; C; 0434; # CYRILLIC CAPITAL LETTER DE |
jbe@7 | 679 0415; C; 0435; # CYRILLIC CAPITAL LETTER IE |
jbe@7 | 680 0416; C; 0436; # CYRILLIC CAPITAL LETTER ZHE |
jbe@7 | 681 0417; C; 0437; # CYRILLIC CAPITAL LETTER ZE |
jbe@7 | 682 0418; C; 0438; # CYRILLIC CAPITAL LETTER I |
jbe@7 | 683 0419; C; 0439; # CYRILLIC CAPITAL LETTER SHORT I |
jbe@7 | 684 041A; C; 043A; # CYRILLIC CAPITAL LETTER KA |
jbe@7 | 685 041B; C; 043B; # CYRILLIC CAPITAL LETTER EL |
jbe@7 | 686 041C; C; 043C; # CYRILLIC CAPITAL LETTER EM |
jbe@7 | 687 041D; C; 043D; # CYRILLIC CAPITAL LETTER EN |
jbe@7 | 688 041E; C; 043E; # CYRILLIC CAPITAL LETTER O |
jbe@7 | 689 041F; C; 043F; # CYRILLIC CAPITAL LETTER PE |
jbe@7 | 690 0420; C; 0440; # CYRILLIC CAPITAL LETTER ER |
jbe@7 | 691 0421; C; 0441; # CYRILLIC CAPITAL LETTER ES |
jbe@7 | 692 0422; C; 0442; # CYRILLIC CAPITAL LETTER TE |
jbe@7 | 693 0423; C; 0443; # CYRILLIC CAPITAL LETTER U |
jbe@7 | 694 0424; C; 0444; # CYRILLIC CAPITAL LETTER EF |
jbe@7 | 695 0425; C; 0445; # CYRILLIC CAPITAL LETTER HA |
jbe@7 | 696 0426; C; 0446; # CYRILLIC CAPITAL LETTER TSE |
jbe@7 | 697 0427; C; 0447; # CYRILLIC CAPITAL LETTER CHE |
jbe@7 | 698 0428; C; 0448; # CYRILLIC CAPITAL LETTER SHA |
jbe@7 | 699 0429; C; 0449; # CYRILLIC CAPITAL LETTER SHCHA |
jbe@7 | 700 042A; C; 044A; # CYRILLIC CAPITAL LETTER HARD SIGN |
jbe@7 | 701 042B; C; 044B; # CYRILLIC CAPITAL LETTER YERU |
jbe@7 | 702 042C; C; 044C; # CYRILLIC CAPITAL LETTER SOFT SIGN |
jbe@7 | 703 042D; C; 044D; # CYRILLIC CAPITAL LETTER E |
jbe@7 | 704 042E; C; 044E; # CYRILLIC CAPITAL LETTER YU |
jbe@7 | 705 042F; C; 044F; # CYRILLIC CAPITAL LETTER YA |
jbe@7 | 706 0460; C; 0461; # CYRILLIC CAPITAL LETTER OMEGA |
jbe@7 | 707 0462; C; 0463; # CYRILLIC CAPITAL LETTER YAT |
jbe@7 | 708 0464; C; 0465; # CYRILLIC CAPITAL LETTER IOTIFIED E |
jbe@7 | 709 0466; C; 0467; # CYRILLIC CAPITAL LETTER LITTLE YUS |
jbe@7 | 710 0468; C; 0469; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS |
jbe@7 | 711 046A; C; 046B; # CYRILLIC CAPITAL LETTER BIG YUS |
jbe@7 | 712 046C; C; 046D; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS |
jbe@7 | 713 046E; C; 046F; # CYRILLIC CAPITAL LETTER KSI |
jbe@7 | 714 0470; C; 0471; # CYRILLIC CAPITAL LETTER PSI |
jbe@7 | 715 0472; C; 0473; # CYRILLIC CAPITAL LETTER FITA |
jbe@7 | 716 0474; C; 0475; # CYRILLIC CAPITAL LETTER IZHITSA |
jbe@7 | 717 0476; C; 0477; # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT |
jbe@7 | 718 0478; C; 0479; # CYRILLIC CAPITAL LETTER UK |
jbe@7 | 719 047A; C; 047B; # CYRILLIC CAPITAL LETTER ROUND OMEGA |
jbe@7 | 720 047C; C; 047D; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO |
jbe@7 | 721 047E; C; 047F; # CYRILLIC CAPITAL LETTER OT |
jbe@7 | 722 0480; C; 0481; # CYRILLIC CAPITAL LETTER KOPPA |
jbe@7 | 723 048A; C; 048B; # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL |
jbe@7 | 724 048C; C; 048D; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN |
jbe@7 | 725 048E; C; 048F; # CYRILLIC CAPITAL LETTER ER WITH TICK |
jbe@7 | 726 0490; C; 0491; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN |
jbe@7 | 727 0492; C; 0493; # CYRILLIC CAPITAL LETTER GHE WITH STROKE |
jbe@7 | 728 0494; C; 0495; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK |
jbe@7 | 729 0496; C; 0497; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER |
jbe@7 | 730 0498; C; 0499; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER |
jbe@7 | 731 049A; C; 049B; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER |
jbe@7 | 732 049C; C; 049D; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE |
jbe@7 | 733 049E; C; 049F; # CYRILLIC CAPITAL LETTER KA WITH STROKE |
jbe@7 | 734 04A0; C; 04A1; # CYRILLIC CAPITAL LETTER BASHKIR KA |
jbe@7 | 735 04A2; C; 04A3; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER |
jbe@7 | 736 04A4; C; 04A5; # CYRILLIC CAPITAL LIGATURE EN GHE |
jbe@7 | 737 04A6; C; 04A7; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK |
jbe@7 | 738 04A8; C; 04A9; # CYRILLIC CAPITAL LETTER ABKHASIAN HA |
jbe@7 | 739 04AA; C; 04AB; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER |
jbe@7 | 740 04AC; C; 04AD; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER |
jbe@7 | 741 04AE; C; 04AF; # CYRILLIC CAPITAL LETTER STRAIGHT U |
jbe@7 | 742 04B0; C; 04B1; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE |
jbe@7 | 743 04B2; C; 04B3; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER |
jbe@7 | 744 04B4; C; 04B5; # CYRILLIC CAPITAL LIGATURE TE TSE |
jbe@7 | 745 04B6; C; 04B7; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER |
jbe@7 | 746 04B8; C; 04B9; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE |
jbe@7 | 747 04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA |
jbe@7 | 748 04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE |
jbe@7 | 749 04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER |
jbe@7 | 750 04C0; C; 04CF; # CYRILLIC LETTER PALOCHKA |
jbe@7 | 751 04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE |
jbe@7 | 752 04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK |
jbe@7 | 753 04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL |
jbe@7 | 754 04C7; C; 04C8; # CYRILLIC CAPITAL LETTER EN WITH HOOK |
jbe@7 | 755 04C9; C; 04CA; # CYRILLIC CAPITAL LETTER EN WITH TAIL |
jbe@7 | 756 04CB; C; 04CC; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE |
jbe@7 | 757 04CD; C; 04CE; # CYRILLIC CAPITAL LETTER EM WITH TAIL |
jbe@7 | 758 04D0; C; 04D1; # CYRILLIC CAPITAL LETTER A WITH BREVE |
jbe@7 | 759 04D2; C; 04D3; # CYRILLIC CAPITAL LETTER A WITH DIAERESIS |
jbe@7 | 760 04D4; C; 04D5; # CYRILLIC CAPITAL LIGATURE A IE |
jbe@7 | 761 04D6; C; 04D7; # CYRILLIC CAPITAL LETTER IE WITH BREVE |
jbe@7 | 762 04D8; C; 04D9; # CYRILLIC CAPITAL LETTER SCHWA |
jbe@7 | 763 04DA; C; 04DB; # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS |
jbe@7 | 764 04DC; C; 04DD; # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS |
jbe@7 | 765 04DE; C; 04DF; # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS |
jbe@7 | 766 04E0; C; 04E1; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE |
jbe@7 | 767 04E2; C; 04E3; # CYRILLIC CAPITAL LETTER I WITH MACRON |
jbe@7 | 768 04E4; C; 04E5; # CYRILLIC CAPITAL LETTER I WITH DIAERESIS |
jbe@7 | 769 04E6; C; 04E7; # CYRILLIC CAPITAL LETTER O WITH DIAERESIS |
jbe@7 | 770 04E8; C; 04E9; # CYRILLIC CAPITAL LETTER BARRED O |
jbe@7 | 771 04EA; C; 04EB; # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS |
jbe@7 | 772 04EC; C; 04ED; # CYRILLIC CAPITAL LETTER E WITH DIAERESIS |
jbe@7 | 773 04EE; C; 04EF; # CYRILLIC CAPITAL LETTER U WITH MACRON |
jbe@7 | 774 04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS |
jbe@7 | 775 04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE |
jbe@7 | 776 04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS |
jbe@7 | 777 04F6; C; 04F7; # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER |
jbe@7 | 778 04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS |
jbe@7 | 779 04FA; C; 04FB; # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK |
jbe@7 | 780 04FC; C; 04FD; # CYRILLIC CAPITAL LETTER HA WITH HOOK |
jbe@7 | 781 04FE; C; 04FF; # CYRILLIC CAPITAL LETTER HA WITH STROKE |
jbe@7 | 782 0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE |
jbe@7 | 783 0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE |
jbe@7 | 784 0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE |
jbe@7 | 785 0506; C; 0507; # CYRILLIC CAPITAL LETTER KOMI DZJE |
jbe@7 | 786 0508; C; 0509; # CYRILLIC CAPITAL LETTER KOMI LJE |
jbe@7 | 787 050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE |
jbe@7 | 788 050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE |
jbe@7 | 789 050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE |
jbe@7 | 790 0510; C; 0511; # CYRILLIC CAPITAL LETTER REVERSED ZE |
jbe@7 | 791 0512; C; 0513; # CYRILLIC CAPITAL LETTER EL WITH HOOK |
jbe@7 | 792 0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB |
jbe@7 | 793 0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN |
jbe@7 | 794 0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM |
jbe@7 | 795 0534; C; 0564; # ARMENIAN CAPITAL LETTER DA |
jbe@7 | 796 0535; C; 0565; # ARMENIAN CAPITAL LETTER ECH |
jbe@7 | 797 0536; C; 0566; # ARMENIAN CAPITAL LETTER ZA |
jbe@7 | 798 0537; C; 0567; # ARMENIAN CAPITAL LETTER EH |
jbe@7 | 799 0538; C; 0568; # ARMENIAN CAPITAL LETTER ET |
jbe@7 | 800 0539; C; 0569; # ARMENIAN CAPITAL LETTER TO |
jbe@7 | 801 053A; C; 056A; # ARMENIAN CAPITAL LETTER ZHE |
jbe@7 | 802 053B; C; 056B; # ARMENIAN CAPITAL LETTER INI |
jbe@7 | 803 053C; C; 056C; # ARMENIAN CAPITAL LETTER LIWN |
jbe@7 | 804 053D; C; 056D; # ARMENIAN CAPITAL LETTER XEH |
jbe@7 | 805 053E; C; 056E; # ARMENIAN CAPITAL LETTER CA |
jbe@7 | 806 053F; C; 056F; # ARMENIAN CAPITAL LETTER KEN |
jbe@7 | 807 0540; C; 0570; # ARMENIAN CAPITAL LETTER HO |
jbe@7 | 808 0541; C; 0571; # ARMENIAN CAPITAL LETTER JA |
jbe@7 | 809 0542; C; 0572; # ARMENIAN CAPITAL LETTER GHAD |
jbe@7 | 810 0543; C; 0573; # ARMENIAN CAPITAL LETTER CHEH |
jbe@7 | 811 0544; C; 0574; # ARMENIAN CAPITAL LETTER MEN |
jbe@7 | 812 0545; C; 0575; # ARMENIAN CAPITAL LETTER YI |
jbe@7 | 813 0546; C; 0576; # ARMENIAN CAPITAL LETTER NOW |
jbe@7 | 814 0547; C; 0577; # ARMENIAN CAPITAL LETTER SHA |
jbe@7 | 815 0548; C; 0578; # ARMENIAN CAPITAL LETTER VO |
jbe@7 | 816 0549; C; 0579; # ARMENIAN CAPITAL LETTER CHA |
jbe@7 | 817 054A; C; 057A; # ARMENIAN CAPITAL LETTER PEH |
jbe@7 | 818 054B; C; 057B; # ARMENIAN CAPITAL LETTER JHEH |
jbe@7 | 819 054C; C; 057C; # ARMENIAN CAPITAL LETTER RA |
jbe@7 | 820 054D; C; 057D; # ARMENIAN CAPITAL LETTER SEH |
jbe@7 | 821 054E; C; 057E; # ARMENIAN CAPITAL LETTER VEW |
jbe@7 | 822 054F; C; 057F; # ARMENIAN CAPITAL LETTER TIWN |
jbe@7 | 823 0550; C; 0580; # ARMENIAN CAPITAL LETTER REH |
jbe@7 | 824 0551; C; 0581; # ARMENIAN CAPITAL LETTER CO |
jbe@7 | 825 0552; C; 0582; # ARMENIAN CAPITAL LETTER YIWN |
jbe@7 | 826 0553; C; 0583; # ARMENIAN CAPITAL LETTER PIWR |
jbe@7 | 827 0554; C; 0584; # ARMENIAN CAPITAL LETTER KEH |
jbe@7 | 828 0555; C; 0585; # ARMENIAN CAPITAL LETTER OH |
jbe@7 | 829 0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH |
jbe@7 | 830 0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN |
jbe@7 | 831 10A0; C; 2D00; # GEORGIAN CAPITAL LETTER AN |
jbe@7 | 832 10A1; C; 2D01; # GEORGIAN CAPITAL LETTER BAN |
jbe@7 | 833 10A2; C; 2D02; # GEORGIAN CAPITAL LETTER GAN |
jbe@7 | 834 10A3; C; 2D03; # GEORGIAN CAPITAL LETTER DON |
jbe@7 | 835 10A4; C; 2D04; # GEORGIAN CAPITAL LETTER EN |
jbe@7 | 836 10A5; C; 2D05; # GEORGIAN CAPITAL LETTER VIN |
jbe@7 | 837 10A6; C; 2D06; # GEORGIAN CAPITAL LETTER ZEN |
jbe@7 | 838 10A7; C; 2D07; # GEORGIAN CAPITAL LETTER TAN |
jbe@7 | 839 10A8; C; 2D08; # GEORGIAN CAPITAL LETTER IN |
jbe@7 | 840 10A9; C; 2D09; # GEORGIAN CAPITAL LETTER KAN |
jbe@7 | 841 10AA; C; 2D0A; # GEORGIAN CAPITAL LETTER LAS |
jbe@7 | 842 10AB; C; 2D0B; # GEORGIAN CAPITAL LETTER MAN |
jbe@7 | 843 10AC; C; 2D0C; # GEORGIAN CAPITAL LETTER NAR |
jbe@7 | 844 10AD; C; 2D0D; # GEORGIAN CAPITAL LETTER ON |
jbe@7 | 845 10AE; C; 2D0E; # GEORGIAN CAPITAL LETTER PAR |
jbe@7 | 846 10AF; C; 2D0F; # GEORGIAN CAPITAL LETTER ZHAR |
jbe@7 | 847 10B0; C; 2D10; # GEORGIAN CAPITAL LETTER RAE |
jbe@7 | 848 10B1; C; 2D11; # GEORGIAN CAPITAL LETTER SAN |
jbe@7 | 849 10B2; C; 2D12; # GEORGIAN CAPITAL LETTER TAR |
jbe@7 | 850 10B3; C; 2D13; # GEORGIAN CAPITAL LETTER UN |
jbe@7 | 851 10B4; C; 2D14; # GEORGIAN CAPITAL LETTER PHAR |
jbe@7 | 852 10B5; C; 2D15; # GEORGIAN CAPITAL LETTER KHAR |
jbe@7 | 853 10B6; C; 2D16; # GEORGIAN CAPITAL LETTER GHAN |
jbe@7 | 854 10B7; C; 2D17; # GEORGIAN CAPITAL LETTER QAR |
jbe@7 | 855 10B8; C; 2D18; # GEORGIAN CAPITAL LETTER SHIN |
jbe@7 | 856 10B9; C; 2D19; # GEORGIAN CAPITAL LETTER CHIN |
jbe@7 | 857 10BA; C; 2D1A; # GEORGIAN CAPITAL LETTER CAN |
jbe@7 | 858 10BB; C; 2D1B; # GEORGIAN CAPITAL LETTER JIL |
jbe@7 | 859 10BC; C; 2D1C; # GEORGIAN CAPITAL LETTER CIL |
jbe@7 | 860 10BD; C; 2D1D; # GEORGIAN CAPITAL LETTER CHAR |
jbe@7 | 861 10BE; C; 2D1E; # GEORGIAN CAPITAL LETTER XAN |
jbe@7 | 862 10BF; C; 2D1F; # GEORGIAN CAPITAL LETTER JHAN |
jbe@7 | 863 10C0; C; 2D20; # GEORGIAN CAPITAL LETTER HAE |
jbe@7 | 864 10C1; C; 2D21; # GEORGIAN CAPITAL LETTER HE |
jbe@7 | 865 10C2; C; 2D22; # GEORGIAN CAPITAL LETTER HIE |
jbe@7 | 866 10C3; C; 2D23; # GEORGIAN CAPITAL LETTER WE |
jbe@7 | 867 10C4; C; 2D24; # GEORGIAN CAPITAL LETTER HAR |
jbe@7 | 868 10C5; C; 2D25; # GEORGIAN CAPITAL LETTER HOE |
jbe@7 | 869 1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW |
jbe@7 | 870 1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE |
jbe@7 | 871 1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW |
jbe@7 | 872 1E06; C; 1E07; # LATIN CAPITAL LETTER B WITH LINE BELOW |
jbe@7 | 873 1E08; C; 1E09; # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE |
jbe@7 | 874 1E0A; C; 1E0B; # LATIN CAPITAL LETTER D WITH DOT ABOVE |
jbe@7 | 875 1E0C; C; 1E0D; # LATIN CAPITAL LETTER D WITH DOT BELOW |
jbe@7 | 876 1E0E; C; 1E0F; # LATIN CAPITAL LETTER D WITH LINE BELOW |
jbe@7 | 877 1E10; C; 1E11; # LATIN CAPITAL LETTER D WITH CEDILLA |
jbe@7 | 878 1E12; C; 1E13; # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW |
jbe@7 | 879 1E14; C; 1E15; # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE |
jbe@7 | 880 1E16; C; 1E17; # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE |
jbe@7 | 881 1E18; C; 1E19; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW |
jbe@7 | 882 1E1A; C; 1E1B; # LATIN CAPITAL LETTER E WITH TILDE BELOW |
jbe@7 | 883 1E1C; C; 1E1D; # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE |
jbe@7 | 884 1E1E; C; 1E1F; # LATIN CAPITAL LETTER F WITH DOT ABOVE |
jbe@7 | 885 1E20; C; 1E21; # LATIN CAPITAL LETTER G WITH MACRON |
jbe@7 | 886 1E22; C; 1E23; # LATIN CAPITAL LETTER H WITH DOT ABOVE |
jbe@7 | 887 1E24; C; 1E25; # LATIN CAPITAL LETTER H WITH DOT BELOW |
jbe@7 | 888 1E26; C; 1E27; # LATIN CAPITAL LETTER H WITH DIAERESIS |
jbe@7 | 889 1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA |
jbe@7 | 890 1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW |
jbe@7 | 891 1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW |
jbe@7 | 892 1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE |
jbe@7 | 893 1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE |
jbe@7 | 894 1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW |
jbe@7 | 895 1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW |
jbe@7 | 896 1E36; C; 1E37; # LATIN CAPITAL LETTER L WITH DOT BELOW |
jbe@7 | 897 1E38; C; 1E39; # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON |
jbe@7 | 898 1E3A; C; 1E3B; # LATIN CAPITAL LETTER L WITH LINE BELOW |
jbe@7 | 899 1E3C; C; 1E3D; # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW |
jbe@7 | 900 1E3E; C; 1E3F; # LATIN CAPITAL LETTER M WITH ACUTE |
jbe@7 | 901 1E40; C; 1E41; # LATIN CAPITAL LETTER M WITH DOT ABOVE |
jbe@7 | 902 1E42; C; 1E43; # LATIN CAPITAL LETTER M WITH DOT BELOW |
jbe@7 | 903 1E44; C; 1E45; # LATIN CAPITAL LETTER N WITH DOT ABOVE |
jbe@7 | 904 1E46; C; 1E47; # LATIN CAPITAL LETTER N WITH DOT BELOW |
jbe@7 | 905 1E48; C; 1E49; # LATIN CAPITAL LETTER N WITH LINE BELOW |
jbe@7 | 906 1E4A; C; 1E4B; # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW |
jbe@7 | 907 1E4C; C; 1E4D; # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE |
jbe@7 | 908 1E4E; C; 1E4F; # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS |
jbe@7 | 909 1E50; C; 1E51; # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE |
jbe@7 | 910 1E52; C; 1E53; # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE |
jbe@7 | 911 1E54; C; 1E55; # LATIN CAPITAL LETTER P WITH ACUTE |
jbe@7 | 912 1E56; C; 1E57; # LATIN CAPITAL LETTER P WITH DOT ABOVE |
jbe@7 | 913 1E58; C; 1E59; # LATIN CAPITAL LETTER R WITH DOT ABOVE |
jbe@7 | 914 1E5A; C; 1E5B; # LATIN CAPITAL LETTER R WITH DOT BELOW |
jbe@7 | 915 1E5C; C; 1E5D; # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON |
jbe@7 | 916 1E5E; C; 1E5F; # LATIN CAPITAL LETTER R WITH LINE BELOW |
jbe@7 | 917 1E60; C; 1E61; # LATIN CAPITAL LETTER S WITH DOT ABOVE |
jbe@7 | 918 1E62; C; 1E63; # LATIN CAPITAL LETTER S WITH DOT BELOW |
jbe@7 | 919 1E64; C; 1E65; # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE |
jbe@7 | 920 1E66; C; 1E67; # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE |
jbe@7 | 921 1E68; C; 1E69; # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE |
jbe@7 | 922 1E6A; C; 1E6B; # LATIN CAPITAL LETTER T WITH DOT ABOVE |
jbe@7 | 923 1E6C; C; 1E6D; # LATIN CAPITAL LETTER T WITH DOT BELOW |
jbe@7 | 924 1E6E; C; 1E6F; # LATIN CAPITAL LETTER T WITH LINE BELOW |
jbe@7 | 925 1E70; C; 1E71; # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW |
jbe@7 | 926 1E72; C; 1E73; # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW |
jbe@7 | 927 1E74; C; 1E75; # LATIN CAPITAL LETTER U WITH TILDE BELOW |
jbe@7 | 928 1E76; C; 1E77; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW |
jbe@7 | 929 1E78; C; 1E79; # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE |
jbe@7 | 930 1E7A; C; 1E7B; # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS |
jbe@7 | 931 1E7C; C; 1E7D; # LATIN CAPITAL LETTER V WITH TILDE |
jbe@7 | 932 1E7E; C; 1E7F; # LATIN CAPITAL LETTER V WITH DOT BELOW |
jbe@7 | 933 1E80; C; 1E81; # LATIN CAPITAL LETTER W WITH GRAVE |
jbe@7 | 934 1E82; C; 1E83; # LATIN CAPITAL LETTER W WITH ACUTE |
jbe@7 | 935 1E84; C; 1E85; # LATIN CAPITAL LETTER W WITH DIAERESIS |
jbe@7 | 936 1E86; C; 1E87; # LATIN CAPITAL LETTER W WITH DOT ABOVE |
jbe@7 | 937 1E88; C; 1E89; # LATIN CAPITAL LETTER W WITH DOT BELOW |
jbe@7 | 938 1E8A; C; 1E8B; # LATIN CAPITAL LETTER X WITH DOT ABOVE |
jbe@7 | 939 1E8C; C; 1E8D; # LATIN CAPITAL LETTER X WITH DIAERESIS |
jbe@7 | 940 1E8E; C; 1E8F; # LATIN CAPITAL LETTER Y WITH DOT ABOVE |
jbe@7 | 941 1E90; C; 1E91; # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX |
jbe@7 | 942 1E92; C; 1E93; # LATIN CAPITAL LETTER Z WITH DOT BELOW |
jbe@7 | 943 1E94; C; 1E95; # LATIN CAPITAL LETTER Z WITH LINE BELOW |
jbe@7 | 944 1E96; F; 0068 0331; # LATIN SMALL LETTER H WITH LINE BELOW |
jbe@7 | 945 1E97; F; 0074 0308; # LATIN SMALL LETTER T WITH DIAERESIS |
jbe@7 | 946 1E98; F; 0077 030A; # LATIN SMALL LETTER W WITH RING ABOVE |
jbe@7 | 947 1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE |
jbe@7 | 948 1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING |
jbe@7 | 949 1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE |
jbe@7 | 950 1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW |
jbe@7 | 951 1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE |
jbe@7 | 952 1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE |
jbe@7 | 953 1EA6; C; 1EA7; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE |
jbe@7 | 954 1EA8; C; 1EA9; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE |
jbe@7 | 955 1EAA; C; 1EAB; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE |
jbe@7 | 956 1EAC; C; 1EAD; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW |
jbe@7 | 957 1EAE; C; 1EAF; # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE |
jbe@7 | 958 1EB0; C; 1EB1; # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE |
jbe@7 | 959 1EB2; C; 1EB3; # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE |
jbe@7 | 960 1EB4; C; 1EB5; # LATIN CAPITAL LETTER A WITH BREVE AND TILDE |
jbe@7 | 961 1EB6; C; 1EB7; # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW |
jbe@7 | 962 1EB8; C; 1EB9; # LATIN CAPITAL LETTER E WITH DOT BELOW |
jbe@7 | 963 1EBA; C; 1EBB; # LATIN CAPITAL LETTER E WITH HOOK ABOVE |
jbe@7 | 964 1EBC; C; 1EBD; # LATIN CAPITAL LETTER E WITH TILDE |
jbe@7 | 965 1EBE; C; 1EBF; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE |
jbe@7 | 966 1EC0; C; 1EC1; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE |
jbe@7 | 967 1EC2; C; 1EC3; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE |
jbe@7 | 968 1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE |
jbe@7 | 969 1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW |
jbe@7 | 970 1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE |
jbe@7 | 971 1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW |
jbe@7 | 972 1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW |
jbe@7 | 973 1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE |
jbe@7 | 974 1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE |
jbe@7 | 975 1ED2; C; 1ED3; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE |
jbe@7 | 976 1ED4; C; 1ED5; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE |
jbe@7 | 977 1ED6; C; 1ED7; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE |
jbe@7 | 978 1ED8; C; 1ED9; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW |
jbe@7 | 979 1EDA; C; 1EDB; # LATIN CAPITAL LETTER O WITH HORN AND ACUTE |
jbe@7 | 980 1EDC; C; 1EDD; # LATIN CAPITAL LETTER O WITH HORN AND GRAVE |
jbe@7 | 981 1EDE; C; 1EDF; # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE |
jbe@7 | 982 1EE0; C; 1EE1; # LATIN CAPITAL LETTER O WITH HORN AND TILDE |
jbe@7 | 983 1EE2; C; 1EE3; # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW |
jbe@7 | 984 1EE4; C; 1EE5; # LATIN CAPITAL LETTER U WITH DOT BELOW |
jbe@7 | 985 1EE6; C; 1EE7; # LATIN CAPITAL LETTER U WITH HOOK ABOVE |
jbe@7 | 986 1EE8; C; 1EE9; # LATIN CAPITAL LETTER U WITH HORN AND ACUTE |
jbe@7 | 987 1EEA; C; 1EEB; # LATIN CAPITAL LETTER U WITH HORN AND GRAVE |
jbe@7 | 988 1EEC; C; 1EED; # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE |
jbe@7 | 989 1EEE; C; 1EEF; # LATIN CAPITAL LETTER U WITH HORN AND TILDE |
jbe@7 | 990 1EF0; C; 1EF1; # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW |
jbe@7 | 991 1EF2; C; 1EF3; # LATIN CAPITAL LETTER Y WITH GRAVE |
jbe@7 | 992 1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW |
jbe@7 | 993 1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE |
jbe@7 | 994 1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE |
jbe@7 | 995 1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI |
jbe@7 | 996 1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA |
jbe@7 | 997 1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA |
jbe@7 | 998 1F0B; C; 1F03; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA |
jbe@7 | 999 1F0C; C; 1F04; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA |
jbe@7 | 1000 1F0D; C; 1F05; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA |
jbe@7 | 1001 1F0E; C; 1F06; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI |
jbe@7 | 1002 1F0F; C; 1F07; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI |
jbe@7 | 1003 1F18; C; 1F10; # GREEK CAPITAL LETTER EPSILON WITH PSILI |
jbe@7 | 1004 1F19; C; 1F11; # GREEK CAPITAL LETTER EPSILON WITH DASIA |
jbe@7 | 1005 1F1A; C; 1F12; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA |
jbe@7 | 1006 1F1B; C; 1F13; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA |
jbe@7 | 1007 1F1C; C; 1F14; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA |
jbe@7 | 1008 1F1D; C; 1F15; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA |
jbe@7 | 1009 1F28; C; 1F20; # GREEK CAPITAL LETTER ETA WITH PSILI |
jbe@7 | 1010 1F29; C; 1F21; # GREEK CAPITAL LETTER ETA WITH DASIA |
jbe@7 | 1011 1F2A; C; 1F22; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA |
jbe@7 | 1012 1F2B; C; 1F23; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA |
jbe@7 | 1013 1F2C; C; 1F24; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA |
jbe@7 | 1014 1F2D; C; 1F25; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA |
jbe@7 | 1015 1F2E; C; 1F26; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI |
jbe@7 | 1016 1F2F; C; 1F27; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI |
jbe@7 | 1017 1F38; C; 1F30; # GREEK CAPITAL LETTER IOTA WITH PSILI |
jbe@7 | 1018 1F39; C; 1F31; # GREEK CAPITAL LETTER IOTA WITH DASIA |
jbe@7 | 1019 1F3A; C; 1F32; # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA |
jbe@7 | 1020 1F3B; C; 1F33; # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA |
jbe@7 | 1021 1F3C; C; 1F34; # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA |
jbe@7 | 1022 1F3D; C; 1F35; # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA |
jbe@7 | 1023 1F3E; C; 1F36; # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI |
jbe@7 | 1024 1F3F; C; 1F37; # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI |
jbe@7 | 1025 1F48; C; 1F40; # GREEK CAPITAL LETTER OMICRON WITH PSILI |
jbe@7 | 1026 1F49; C; 1F41; # GREEK CAPITAL LETTER OMICRON WITH DASIA |
jbe@7 | 1027 1F4A; C; 1F42; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA |
jbe@7 | 1028 1F4B; C; 1F43; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA |
jbe@7 | 1029 1F4C; C; 1F44; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA |
jbe@7 | 1030 1F4D; C; 1F45; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA |
jbe@7 | 1031 1F50; F; 03C5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI |
jbe@7 | 1032 1F52; F; 03C5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA |
jbe@7 | 1033 1F54; F; 03C5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA |
jbe@7 | 1034 1F56; F; 03C5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI |
jbe@7 | 1035 1F59; C; 1F51; # GREEK CAPITAL LETTER UPSILON WITH DASIA |
jbe@7 | 1036 1F5B; C; 1F53; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA |
jbe@7 | 1037 1F5D; C; 1F55; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA |
jbe@7 | 1038 1F5F; C; 1F57; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI |
jbe@7 | 1039 1F68; C; 1F60; # GREEK CAPITAL LETTER OMEGA WITH PSILI |
jbe@7 | 1040 1F69; C; 1F61; # GREEK CAPITAL LETTER OMEGA WITH DASIA |
jbe@7 | 1041 1F6A; C; 1F62; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA |
jbe@7 | 1042 1F6B; C; 1F63; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA |
jbe@7 | 1043 1F6C; C; 1F64; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA |
jbe@7 | 1044 1F6D; C; 1F65; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA |
jbe@7 | 1045 1F6E; C; 1F66; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI |
jbe@7 | 1046 1F6F; C; 1F67; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI |
jbe@7 | 1047 1F80; F; 1F00 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI |
jbe@7 | 1048 1F81; F; 1F01 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI |
jbe@7 | 1049 1F82; F; 1F02 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI |
jbe@7 | 1050 1F83; F; 1F03 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI |
jbe@7 | 1051 1F84; F; 1F04 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI |
jbe@7 | 1052 1F85; F; 1F05 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI |
jbe@7 | 1053 1F86; F; 1F06 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI |
jbe@7 | 1054 1F87; F; 1F07 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI |
jbe@7 | 1055 1F88; F; 1F00 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI |
jbe@7 | 1056 1F88; S; 1F80; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI |
jbe@7 | 1057 1F89; F; 1F01 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI |
jbe@7 | 1058 1F89; S; 1F81; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI |
jbe@7 | 1059 1F8A; F; 1F02 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI |
jbe@7 | 1060 1F8A; S; 1F82; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI |
jbe@7 | 1061 1F8B; F; 1F03 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI |
jbe@7 | 1062 1F8B; S; 1F83; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI |
jbe@7 | 1063 1F8C; F; 1F04 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI |
jbe@7 | 1064 1F8C; S; 1F84; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI |
jbe@7 | 1065 1F8D; F; 1F05 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI |
jbe@7 | 1066 1F8D; S; 1F85; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI |
jbe@7 | 1067 1F8E; F; 1F06 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI |
jbe@7 | 1068 1F8E; S; 1F86; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI |
jbe@7 | 1069 1F8F; F; 1F07 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI |
jbe@7 | 1070 1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI |
jbe@7 | 1071 1F90; F; 1F20 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI |
jbe@7 | 1072 1F91; F; 1F21 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI |
jbe@7 | 1073 1F92; F; 1F22 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI |
jbe@7 | 1074 1F93; F; 1F23 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI |
jbe@7 | 1075 1F94; F; 1F24 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI |
jbe@7 | 1076 1F95; F; 1F25 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI |
jbe@7 | 1077 1F96; F; 1F26 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI |
jbe@7 | 1078 1F97; F; 1F27 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI |
jbe@7 | 1079 1F98; F; 1F20 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI |
jbe@7 | 1080 1F98; S; 1F90; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI |
jbe@7 | 1081 1F99; F; 1F21 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI |
jbe@7 | 1082 1F99; S; 1F91; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI |
jbe@7 | 1083 1F9A; F; 1F22 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI |
jbe@7 | 1084 1F9A; S; 1F92; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI |
jbe@7 | 1085 1F9B; F; 1F23 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI |
jbe@7 | 1086 1F9B; S; 1F93; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI |
jbe@7 | 1087 1F9C; F; 1F24 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI |
jbe@7 | 1088 1F9C; S; 1F94; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI |
jbe@7 | 1089 1F9D; F; 1F25 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI |
jbe@7 | 1090 1F9D; S; 1F95; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI |
jbe@7 | 1091 1F9E; F; 1F26 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI |
jbe@7 | 1092 1F9E; S; 1F96; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI |
jbe@7 | 1093 1F9F; F; 1F27 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI |
jbe@7 | 1094 1F9F; S; 1F97; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI |
jbe@7 | 1095 1FA0; F; 1F60 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI |
jbe@7 | 1096 1FA1; F; 1F61 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI |
jbe@7 | 1097 1FA2; F; 1F62 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI |
jbe@7 | 1098 1FA3; F; 1F63 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI |
jbe@7 | 1099 1FA4; F; 1F64 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI |
jbe@7 | 1100 1FA5; F; 1F65 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI |
jbe@7 | 1101 1FA6; F; 1F66 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI |
jbe@7 | 1102 1FA7; F; 1F67 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI |
jbe@7 | 1103 1FA8; F; 1F60 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI |
jbe@7 | 1104 1FA8; S; 1FA0; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI |
jbe@7 | 1105 1FA9; F; 1F61 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI |
jbe@7 | 1106 1FA9; S; 1FA1; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI |
jbe@7 | 1107 1FAA; F; 1F62 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI |
jbe@7 | 1108 1FAA; S; 1FA2; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI |
jbe@7 | 1109 1FAB; F; 1F63 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI |
jbe@7 | 1110 1FAB; S; 1FA3; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI |
jbe@7 | 1111 1FAC; F; 1F64 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI |
jbe@7 | 1112 1FAC; S; 1FA4; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI |
jbe@7 | 1113 1FAD; F; 1F65 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI |
jbe@7 | 1114 1FAD; S; 1FA5; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI |
jbe@7 | 1115 1FAE; F; 1F66 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI |
jbe@7 | 1116 1FAE; S; 1FA6; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI |
jbe@7 | 1117 1FAF; F; 1F67 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI |
jbe@7 | 1118 1FAF; S; 1FA7; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI |
jbe@7 | 1119 1FB2; F; 1F70 03B9; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI |
jbe@7 | 1120 1FB3; F; 03B1 03B9; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI |
jbe@7 | 1121 1FB4; F; 03AC 03B9; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI |
jbe@7 | 1122 1FB6; F; 03B1 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI |
jbe@7 | 1123 1FB7; F; 03B1 0342 03B9; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI |
jbe@7 | 1124 1FB8; C; 1FB0; # GREEK CAPITAL LETTER ALPHA WITH VRACHY |
jbe@7 | 1125 1FB9; C; 1FB1; # GREEK CAPITAL LETTER ALPHA WITH MACRON |
jbe@7 | 1126 1FBA; C; 1F70; # GREEK CAPITAL LETTER ALPHA WITH VARIA |
jbe@7 | 1127 1FBB; C; 1F71; # GREEK CAPITAL LETTER ALPHA WITH OXIA |
jbe@7 | 1128 1FBC; F; 03B1 03B9; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI |
jbe@7 | 1129 1FBC; S; 1FB3; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI |
jbe@7 | 1130 1FBE; C; 03B9; # GREEK PROSGEGRAMMENI |
jbe@7 | 1131 1FC2; F; 1F74 03B9; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI |
jbe@7 | 1132 1FC3; F; 03B7 03B9; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI |
jbe@7 | 1133 1FC4; F; 03AE 03B9; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI |
jbe@7 | 1134 1FC6; F; 03B7 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI |
jbe@7 | 1135 1FC7; F; 03B7 0342 03B9; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI |
jbe@7 | 1136 1FC8; C; 1F72; # GREEK CAPITAL LETTER EPSILON WITH VARIA |
jbe@7 | 1137 1FC9; C; 1F73; # GREEK CAPITAL LETTER EPSILON WITH OXIA |
jbe@7 | 1138 1FCA; C; 1F74; # GREEK CAPITAL LETTER ETA WITH VARIA |
jbe@7 | 1139 1FCB; C; 1F75; # GREEK CAPITAL LETTER ETA WITH OXIA |
jbe@7 | 1140 1FCC; F; 03B7 03B9; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI |
jbe@7 | 1141 1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI |
jbe@7 | 1142 1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA |
jbe@7 | 1143 1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA |
jbe@7 | 1144 1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI |
jbe@7 | 1145 1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI |
jbe@7 | 1146 1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY |
jbe@7 | 1147 1FD9; C; 1FD1; # GREEK CAPITAL LETTER IOTA WITH MACRON |
jbe@7 | 1148 1FDA; C; 1F76; # GREEK CAPITAL LETTER IOTA WITH VARIA |
jbe@7 | 1149 1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA |
jbe@7 | 1150 1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA |
jbe@7 | 1151 1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA |
jbe@7 | 1152 1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI |
jbe@7 | 1153 1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI |
jbe@7 | 1154 1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI |
jbe@7 | 1155 1FE8; C; 1FE0; # GREEK CAPITAL LETTER UPSILON WITH VRACHY |
jbe@7 | 1156 1FE9; C; 1FE1; # GREEK CAPITAL LETTER UPSILON WITH MACRON |
jbe@7 | 1157 1FEA; C; 1F7A; # GREEK CAPITAL LETTER UPSILON WITH VARIA |
jbe@7 | 1158 1FEB; C; 1F7B; # GREEK CAPITAL LETTER UPSILON WITH OXIA |
jbe@7 | 1159 1FEC; C; 1FE5; # GREEK CAPITAL LETTER RHO WITH DASIA |
jbe@7 | 1160 1FF2; F; 1F7C 03B9; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI |
jbe@7 | 1161 1FF3; F; 03C9 03B9; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI |
jbe@7 | 1162 1FF4; F; 03CE 03B9; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI |
jbe@7 | 1163 1FF6; F; 03C9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI |
jbe@7 | 1164 1FF7; F; 03C9 0342 03B9; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI |
jbe@7 | 1165 1FF8; C; 1F78; # GREEK CAPITAL LETTER OMICRON WITH VARIA |
jbe@7 | 1166 1FF9; C; 1F79; # GREEK CAPITAL LETTER OMICRON WITH OXIA |
jbe@7 | 1167 1FFA; C; 1F7C; # GREEK CAPITAL LETTER OMEGA WITH VARIA |
jbe@7 | 1168 1FFB; C; 1F7D; # GREEK CAPITAL LETTER OMEGA WITH OXIA |
jbe@7 | 1169 1FFC; F; 03C9 03B9; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI |
jbe@7 | 1170 1FFC; S; 1FF3; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI |
jbe@7 | 1171 2126; C; 03C9; # OHM SIGN |
jbe@7 | 1172 212A; C; 006B; # KELVIN SIGN |
jbe@7 | 1173 212B; C; 00E5; # ANGSTROM SIGN |
jbe@7 | 1174 2132; C; 214E; # TURNED CAPITAL F |
jbe@7 | 1175 2160; C; 2170; # ROMAN NUMERAL ONE |
jbe@7 | 1176 2161; C; 2171; # ROMAN NUMERAL TWO |
jbe@7 | 1177 2162; C; 2172; # ROMAN NUMERAL THREE |
jbe@7 | 1178 2163; C; 2173; # ROMAN NUMERAL FOUR |
jbe@7 | 1179 2164; C; 2174; # ROMAN NUMERAL FIVE |
jbe@7 | 1180 2165; C; 2175; # ROMAN NUMERAL SIX |
jbe@7 | 1181 2166; C; 2176; # ROMAN NUMERAL SEVEN |
jbe@7 | 1182 2167; C; 2177; # ROMAN NUMERAL EIGHT |
jbe@7 | 1183 2168; C; 2178; # ROMAN NUMERAL NINE |
jbe@7 | 1184 2169; C; 2179; # ROMAN NUMERAL TEN |
jbe@7 | 1185 216A; C; 217A; # ROMAN NUMERAL ELEVEN |
jbe@7 | 1186 216B; C; 217B; # ROMAN NUMERAL TWELVE |
jbe@7 | 1187 216C; C; 217C; # ROMAN NUMERAL FIFTY |
jbe@7 | 1188 216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED |
jbe@7 | 1189 216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED |
jbe@7 | 1190 216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND |
jbe@7 | 1191 2183; C; 2184; # ROMAN NUMERAL REVERSED ONE HUNDRED |
jbe@7 | 1192 24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A |
jbe@7 | 1193 24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B |
jbe@7 | 1194 24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C |
jbe@7 | 1195 24B9; C; 24D3; # CIRCLED LATIN CAPITAL LETTER D |
jbe@7 | 1196 24BA; C; 24D4; # CIRCLED LATIN CAPITAL LETTER E |
jbe@7 | 1197 24BB; C; 24D5; # CIRCLED LATIN CAPITAL LETTER F |
jbe@7 | 1198 24BC; C; 24D6; # CIRCLED LATIN CAPITAL LETTER G |
jbe@7 | 1199 24BD; C; 24D7; # CIRCLED LATIN CAPITAL LETTER H |
jbe@7 | 1200 24BE; C; 24D8; # CIRCLED LATIN CAPITAL LETTER I |
jbe@7 | 1201 24BF; C; 24D9; # CIRCLED LATIN CAPITAL LETTER J |
jbe@7 | 1202 24C0; C; 24DA; # CIRCLED LATIN CAPITAL LETTER K |
jbe@7 | 1203 24C1; C; 24DB; # CIRCLED LATIN CAPITAL LETTER L |
jbe@7 | 1204 24C2; C; 24DC; # CIRCLED LATIN CAPITAL LETTER M |
jbe@7 | 1205 24C3; C; 24DD; # CIRCLED LATIN CAPITAL LETTER N |
jbe@7 | 1206 24C4; C; 24DE; # CIRCLED LATIN CAPITAL LETTER O |
jbe@7 | 1207 24C5; C; 24DF; # CIRCLED LATIN CAPITAL LETTER P |
jbe@7 | 1208 24C6; C; 24E0; # CIRCLED LATIN CAPITAL LETTER Q |
jbe@7 | 1209 24C7; C; 24E1; # CIRCLED LATIN CAPITAL LETTER R |
jbe@7 | 1210 24C8; C; 24E2; # CIRCLED LATIN CAPITAL LETTER S |
jbe@7 | 1211 24C9; C; 24E3; # CIRCLED LATIN CAPITAL LETTER T |
jbe@7 | 1212 24CA; C; 24E4; # CIRCLED LATIN CAPITAL LETTER U |
jbe@7 | 1213 24CB; C; 24E5; # CIRCLED LATIN CAPITAL LETTER V |
jbe@7 | 1214 24CC; C; 24E6; # CIRCLED LATIN CAPITAL LETTER W |
jbe@7 | 1215 24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X |
jbe@7 | 1216 24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y |
jbe@7 | 1217 24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z |
jbe@7 | 1218 2C00; C; 2C30; # GLAGOLITIC CAPITAL LETTER AZU |
jbe@7 | 1219 2C01; C; 2C31; # GLAGOLITIC CAPITAL LETTER BUKY |
jbe@7 | 1220 2C02; C; 2C32; # GLAGOLITIC CAPITAL LETTER VEDE |
jbe@7 | 1221 2C03; C; 2C33; # GLAGOLITIC CAPITAL LETTER GLAGOLI |
jbe@7 | 1222 2C04; C; 2C34; # GLAGOLITIC CAPITAL LETTER DOBRO |
jbe@7 | 1223 2C05; C; 2C35; # GLAGOLITIC CAPITAL LETTER YESTU |
jbe@7 | 1224 2C06; C; 2C36; # GLAGOLITIC CAPITAL LETTER ZHIVETE |
jbe@7 | 1225 2C07; C; 2C37; # GLAGOLITIC CAPITAL LETTER DZELO |
jbe@7 | 1226 2C08; C; 2C38; # GLAGOLITIC CAPITAL LETTER ZEMLJA |
jbe@7 | 1227 2C09; C; 2C39; # GLAGOLITIC CAPITAL LETTER IZHE |
jbe@7 | 1228 2C0A; C; 2C3A; # GLAGOLITIC CAPITAL LETTER INITIAL IZHE |
jbe@7 | 1229 2C0B; C; 2C3B; # GLAGOLITIC CAPITAL LETTER I |
jbe@7 | 1230 2C0C; C; 2C3C; # GLAGOLITIC CAPITAL LETTER DJERVI |
jbe@7 | 1231 2C0D; C; 2C3D; # GLAGOLITIC CAPITAL LETTER KAKO |
jbe@7 | 1232 2C0E; C; 2C3E; # GLAGOLITIC CAPITAL LETTER LJUDIJE |
jbe@7 | 1233 2C0F; C; 2C3F; # GLAGOLITIC CAPITAL LETTER MYSLITE |
jbe@7 | 1234 2C10; C; 2C40; # GLAGOLITIC CAPITAL LETTER NASHI |
jbe@7 | 1235 2C11; C; 2C41; # GLAGOLITIC CAPITAL LETTER ONU |
jbe@7 | 1236 2C12; C; 2C42; # GLAGOLITIC CAPITAL LETTER POKOJI |
jbe@7 | 1237 2C13; C; 2C43; # GLAGOLITIC CAPITAL LETTER RITSI |
jbe@7 | 1238 2C14; C; 2C44; # GLAGOLITIC CAPITAL LETTER SLOVO |
jbe@7 | 1239 2C15; C; 2C45; # GLAGOLITIC CAPITAL LETTER TVRIDO |
jbe@7 | 1240 2C16; C; 2C46; # GLAGOLITIC CAPITAL LETTER UKU |
jbe@7 | 1241 2C17; C; 2C47; # GLAGOLITIC CAPITAL LETTER FRITU |
jbe@7 | 1242 2C18; C; 2C48; # GLAGOLITIC CAPITAL LETTER HERU |
jbe@7 | 1243 2C19; C; 2C49; # GLAGOLITIC CAPITAL LETTER OTU |
jbe@7 | 1244 2C1A; C; 2C4A; # GLAGOLITIC CAPITAL LETTER PE |
jbe@7 | 1245 2C1B; C; 2C4B; # GLAGOLITIC CAPITAL LETTER SHTA |
jbe@7 | 1246 2C1C; C; 2C4C; # GLAGOLITIC CAPITAL LETTER TSI |
jbe@7 | 1247 2C1D; C; 2C4D; # GLAGOLITIC CAPITAL LETTER CHRIVI |
jbe@7 | 1248 2C1E; C; 2C4E; # GLAGOLITIC CAPITAL LETTER SHA |
jbe@7 | 1249 2C1F; C; 2C4F; # GLAGOLITIC CAPITAL LETTER YERU |
jbe@7 | 1250 2C20; C; 2C50; # GLAGOLITIC CAPITAL LETTER YERI |
jbe@7 | 1251 2C21; C; 2C51; # GLAGOLITIC CAPITAL LETTER YATI |
jbe@7 | 1252 2C22; C; 2C52; # GLAGOLITIC CAPITAL LETTER SPIDERY HA |
jbe@7 | 1253 2C23; C; 2C53; # GLAGOLITIC CAPITAL LETTER YU |
jbe@7 | 1254 2C24; C; 2C54; # GLAGOLITIC CAPITAL LETTER SMALL YUS |
jbe@7 | 1255 2C25; C; 2C55; # GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL |
jbe@7 | 1256 2C26; C; 2C56; # GLAGOLITIC CAPITAL LETTER YO |
jbe@7 | 1257 2C27; C; 2C57; # GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS |
jbe@7 | 1258 2C28; C; 2C58; # GLAGOLITIC CAPITAL LETTER BIG YUS |
jbe@7 | 1259 2C29; C; 2C59; # GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS |
jbe@7 | 1260 2C2A; C; 2C5A; # GLAGOLITIC CAPITAL LETTER FITA |
jbe@7 | 1261 2C2B; C; 2C5B; # GLAGOLITIC CAPITAL LETTER IZHITSA |
jbe@7 | 1262 2C2C; C; 2C5C; # GLAGOLITIC CAPITAL LETTER SHTAPIC |
jbe@7 | 1263 2C2D; C; 2C5D; # GLAGOLITIC CAPITAL LETTER TROKUTASTI A |
jbe@7 | 1264 2C2E; C; 2C5E; # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE |
jbe@7 | 1265 2C60; C; 2C61; # LATIN CAPITAL LETTER L WITH DOUBLE BAR |
jbe@7 | 1266 2C62; C; 026B; # LATIN CAPITAL LETTER L WITH MIDDLE TILDE |
jbe@7 | 1267 2C63; C; 1D7D; # LATIN CAPITAL LETTER P WITH STROKE |
jbe@7 | 1268 2C64; C; 027D; # LATIN CAPITAL LETTER R WITH TAIL |
jbe@7 | 1269 2C67; C; 2C68; # LATIN CAPITAL LETTER H WITH DESCENDER |
jbe@7 | 1270 2C69; C; 2C6A; # LATIN CAPITAL LETTER K WITH DESCENDER |
jbe@7 | 1271 2C6B; C; 2C6C; # LATIN CAPITAL LETTER Z WITH DESCENDER |
jbe@7 | 1272 2C75; C; 2C76; # LATIN CAPITAL LETTER HALF H |
jbe@7 | 1273 2C80; C; 2C81; # COPTIC CAPITAL LETTER ALFA |
jbe@7 | 1274 2C82; C; 2C83; # COPTIC CAPITAL LETTER VIDA |
jbe@7 | 1275 2C84; C; 2C85; # COPTIC CAPITAL LETTER GAMMA |
jbe@7 | 1276 2C86; C; 2C87; # COPTIC CAPITAL LETTER DALDA |
jbe@7 | 1277 2C88; C; 2C89; # COPTIC CAPITAL LETTER EIE |
jbe@7 | 1278 2C8A; C; 2C8B; # COPTIC CAPITAL LETTER SOU |
jbe@7 | 1279 2C8C; C; 2C8D; # COPTIC CAPITAL LETTER ZATA |
jbe@7 | 1280 2C8E; C; 2C8F; # COPTIC CAPITAL LETTER HATE |
jbe@7 | 1281 2C90; C; 2C91; # COPTIC CAPITAL LETTER THETHE |
jbe@7 | 1282 2C92; C; 2C93; # COPTIC CAPITAL LETTER IAUDA |
jbe@7 | 1283 2C94; C; 2C95; # COPTIC CAPITAL LETTER KAPA |
jbe@7 | 1284 2C96; C; 2C97; # COPTIC CAPITAL LETTER LAULA |
jbe@7 | 1285 2C98; C; 2C99; # COPTIC CAPITAL LETTER MI |
jbe@7 | 1286 2C9A; C; 2C9B; # COPTIC CAPITAL LETTER NI |
jbe@7 | 1287 2C9C; C; 2C9D; # COPTIC CAPITAL LETTER KSI |
jbe@7 | 1288 2C9E; C; 2C9F; # COPTIC CAPITAL LETTER O |
jbe@7 | 1289 2CA0; C; 2CA1; # COPTIC CAPITAL LETTER PI |
jbe@7 | 1290 2CA2; C; 2CA3; # COPTIC CAPITAL LETTER RO |
jbe@7 | 1291 2CA4; C; 2CA5; # COPTIC CAPITAL LETTER SIMA |
jbe@7 | 1292 2CA6; C; 2CA7; # COPTIC CAPITAL LETTER TAU |
jbe@7 | 1293 2CA8; C; 2CA9; # COPTIC CAPITAL LETTER UA |
jbe@7 | 1294 2CAA; C; 2CAB; # COPTIC CAPITAL LETTER FI |
jbe@7 | 1295 2CAC; C; 2CAD; # COPTIC CAPITAL LETTER KHI |
jbe@7 | 1296 2CAE; C; 2CAF; # COPTIC CAPITAL LETTER PSI |
jbe@7 | 1297 2CB0; C; 2CB1; # COPTIC CAPITAL LETTER OOU |
jbe@7 | 1298 2CB2; C; 2CB3; # COPTIC CAPITAL LETTER DIALECT-P ALEF |
jbe@7 | 1299 2CB4; C; 2CB5; # COPTIC CAPITAL LETTER OLD COPTIC AIN |
jbe@7 | 1300 2CB6; C; 2CB7; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE |
jbe@7 | 1301 2CB8; C; 2CB9; # COPTIC CAPITAL LETTER DIALECT-P KAPA |
jbe@7 | 1302 2CBA; C; 2CBB; # COPTIC CAPITAL LETTER DIALECT-P NI |
jbe@7 | 1303 2CBC; C; 2CBD; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI |
jbe@7 | 1304 2CBE; C; 2CBF; # COPTIC CAPITAL LETTER OLD COPTIC OOU |
jbe@7 | 1305 2CC0; C; 2CC1; # COPTIC CAPITAL LETTER SAMPI |
jbe@7 | 1306 2CC2; C; 2CC3; # COPTIC CAPITAL LETTER CROSSED SHEI |
jbe@7 | 1307 2CC4; C; 2CC5; # COPTIC CAPITAL LETTER OLD COPTIC SHEI |
jbe@7 | 1308 2CC6; C; 2CC7; # COPTIC CAPITAL LETTER OLD COPTIC ESH |
jbe@7 | 1309 2CC8; C; 2CC9; # COPTIC CAPITAL LETTER AKHMIMIC KHEI |
jbe@7 | 1310 2CCA; C; 2CCB; # COPTIC CAPITAL LETTER DIALECT-P HORI |
jbe@7 | 1311 2CCC; C; 2CCD; # COPTIC CAPITAL LETTER OLD COPTIC HORI |
jbe@7 | 1312 2CCE; C; 2CCF; # COPTIC CAPITAL LETTER OLD COPTIC HA |
jbe@7 | 1313 2CD0; C; 2CD1; # COPTIC CAPITAL LETTER L-SHAPED HA |
jbe@7 | 1314 2CD2; C; 2CD3; # COPTIC CAPITAL LETTER OLD COPTIC HEI |
jbe@7 | 1315 2CD4; C; 2CD5; # COPTIC CAPITAL LETTER OLD COPTIC HAT |
jbe@7 | 1316 2CD6; C; 2CD7; # COPTIC CAPITAL LETTER OLD COPTIC GANGIA |
jbe@7 | 1317 2CD8; C; 2CD9; # COPTIC CAPITAL LETTER OLD COPTIC DJA |
jbe@7 | 1318 2CDA; C; 2CDB; # COPTIC CAPITAL LETTER OLD COPTIC SHIMA |
jbe@7 | 1319 2CDC; C; 2CDD; # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA |
jbe@7 | 1320 2CDE; C; 2CDF; # COPTIC CAPITAL LETTER OLD NUBIAN NGI |
jbe@7 | 1321 2CE0; C; 2CE1; # COPTIC CAPITAL LETTER OLD NUBIAN NYI |
jbe@7 | 1322 2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU |
jbe@7 | 1323 FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF |
jbe@7 | 1324 FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI |
jbe@7 | 1325 FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL |
jbe@7 | 1326 FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI |
jbe@7 | 1327 FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL |
jbe@7 | 1328 FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T |
jbe@7 | 1329 FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST |
jbe@7 | 1330 FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW |
jbe@7 | 1331 FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH |
jbe@7 | 1332 FB15; F; 0574 056B; # ARMENIAN SMALL LIGATURE MEN INI |
jbe@7 | 1333 FB16; F; 057E 0576; # ARMENIAN SMALL LIGATURE VEW NOW |
jbe@7 | 1334 FB17; F; 0574 056D; # ARMENIAN SMALL LIGATURE MEN XEH |
jbe@7 | 1335 FF21; C; FF41; # FULLWIDTH LATIN CAPITAL LETTER A |
jbe@7 | 1336 FF22; C; FF42; # FULLWIDTH LATIN CAPITAL LETTER B |
jbe@7 | 1337 FF23; C; FF43; # FULLWIDTH LATIN CAPITAL LETTER C |
jbe@7 | 1338 FF24; C; FF44; # FULLWIDTH LATIN CAPITAL LETTER D |
jbe@7 | 1339 FF25; C; FF45; # FULLWIDTH LATIN CAPITAL LETTER E |
jbe@7 | 1340 FF26; C; FF46; # FULLWIDTH LATIN CAPITAL LETTER F |
jbe@7 | 1341 FF27; C; FF47; # FULLWIDTH LATIN CAPITAL LETTER G |
jbe@7 | 1342 FF28; C; FF48; # FULLWIDTH LATIN CAPITAL LETTER H |
jbe@7 | 1343 FF29; C; FF49; # FULLWIDTH LATIN CAPITAL LETTER I |
jbe@7 | 1344 FF2A; C; FF4A; # FULLWIDTH LATIN CAPITAL LETTER J |
jbe@7 | 1345 FF2B; C; FF4B; # FULLWIDTH LATIN CAPITAL LETTER K |
jbe@7 | 1346 FF2C; C; FF4C; # FULLWIDTH LATIN CAPITAL LETTER L |
jbe@7 | 1347 FF2D; C; FF4D; # FULLWIDTH LATIN CAPITAL LETTER M |
jbe@7 | 1348 FF2E; C; FF4E; # FULLWIDTH LATIN CAPITAL LETTER N |
jbe@7 | 1349 FF2F; C; FF4F; # FULLWIDTH LATIN CAPITAL LETTER O |
jbe@7 | 1350 FF30; C; FF50; # FULLWIDTH LATIN CAPITAL LETTER P |
jbe@7 | 1351 FF31; C; FF51; # FULLWIDTH LATIN CAPITAL LETTER Q |
jbe@7 | 1352 FF32; C; FF52; # FULLWIDTH LATIN CAPITAL LETTER R |
jbe@7 | 1353 FF33; C; FF53; # FULLWIDTH LATIN CAPITAL LETTER S |
jbe@7 | 1354 FF34; C; FF54; # FULLWIDTH LATIN CAPITAL LETTER T |
jbe@7 | 1355 FF35; C; FF55; # FULLWIDTH LATIN CAPITAL LETTER U |
jbe@7 | 1356 FF36; C; FF56; # FULLWIDTH LATIN CAPITAL LETTER V |
jbe@7 | 1357 FF37; C; FF57; # FULLWIDTH LATIN CAPITAL LETTER W |
jbe@7 | 1358 FF38; C; FF58; # FULLWIDTH LATIN CAPITAL LETTER X |
jbe@7 | 1359 FF39; C; FF59; # FULLWIDTH LATIN CAPITAL LETTER Y |
jbe@7 | 1360 FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z |
jbe@7 | 1361 10400; C; 10428; # DESERET CAPITAL LETTER LONG I |
jbe@7 | 1362 10401; C; 10429; # DESERET CAPITAL LETTER LONG E |
jbe@7 | 1363 10402; C; 1042A; # DESERET CAPITAL LETTER LONG A |
jbe@7 | 1364 10403; C; 1042B; # DESERET CAPITAL LETTER LONG AH |
jbe@7 | 1365 10404; C; 1042C; # DESERET CAPITAL LETTER LONG O |
jbe@7 | 1366 10405; C; 1042D; # DESERET CAPITAL LETTER LONG OO |
jbe@7 | 1367 10406; C; 1042E; # DESERET CAPITAL LETTER SHORT I |
jbe@7 | 1368 10407; C; 1042F; # DESERET CAPITAL LETTER SHORT E |
jbe@7 | 1369 10408; C; 10430; # DESERET CAPITAL LETTER SHORT A |
jbe@7 | 1370 10409; C; 10431; # DESERET CAPITAL LETTER SHORT AH |
jbe@7 | 1371 1040A; C; 10432; # DESERET CAPITAL LETTER SHORT O |
jbe@7 | 1372 1040B; C; 10433; # DESERET CAPITAL LETTER SHORT OO |
jbe@7 | 1373 1040C; C; 10434; # DESERET CAPITAL LETTER AY |
jbe@7 | 1374 1040D; C; 10435; # DESERET CAPITAL LETTER OW |
jbe@7 | 1375 1040E; C; 10436; # DESERET CAPITAL LETTER WU |
jbe@7 | 1376 1040F; C; 10437; # DESERET CAPITAL LETTER YEE |
jbe@7 | 1377 10410; C; 10438; # DESERET CAPITAL LETTER H |
jbe@7 | 1378 10411; C; 10439; # DESERET CAPITAL LETTER PEE |
jbe@7 | 1379 10412; C; 1043A; # DESERET CAPITAL LETTER BEE |
jbe@7 | 1380 10413; C; 1043B; # DESERET CAPITAL LETTER TEE |
jbe@7 | 1381 10414; C; 1043C; # DESERET CAPITAL LETTER DEE |
jbe@7 | 1382 10415; C; 1043D; # DESERET CAPITAL LETTER CHEE |
jbe@7 | 1383 10416; C; 1043E; # DESERET CAPITAL LETTER JEE |
jbe@7 | 1384 10417; C; 1043F; # DESERET CAPITAL LETTER KAY |
jbe@7 | 1385 10418; C; 10440; # DESERET CAPITAL LETTER GAY |
jbe@7 | 1386 10419; C; 10441; # DESERET CAPITAL LETTER EF |
jbe@7 | 1387 1041A; C; 10442; # DESERET CAPITAL LETTER VEE |
jbe@7 | 1388 1041B; C; 10443; # DESERET CAPITAL LETTER ETH |
jbe@7 | 1389 1041C; C; 10444; # DESERET CAPITAL LETTER THEE |
jbe@7 | 1390 1041D; C; 10445; # DESERET CAPITAL LETTER ES |
jbe@7 | 1391 1041E; C; 10446; # DESERET CAPITAL LETTER ZEE |
jbe@7 | 1392 1041F; C; 10447; # DESERET CAPITAL LETTER ESH |
jbe@7 | 1393 10420; C; 10448; # DESERET CAPITAL LETTER ZHEE |
jbe@7 | 1394 10421; C; 10449; # DESERET CAPITAL LETTER ER |
jbe@7 | 1395 10422; C; 1044A; # DESERET CAPITAL LETTER EL |
jbe@7 | 1396 10423; C; 1044B; # DESERET CAPITAL LETTER EM |
jbe@7 | 1397 10424; C; 1044C; # DESERET CAPITAL LETTER EN |
jbe@7 | 1398 10425; C; 1044D; # DESERET CAPITAL LETTER ENG |
jbe@7 | 1399 10426; C; 1044E; # DESERET CAPITAL LETTER OI |
jbe@7 | 1400 10427; C; 1044F; # DESERET CAPITAL LETTER EW |
jbe@7 | 1401 END_OF_LIST |
jbe@7 | 1402 |
jbe@7 | 1403 $case_folding = {} |
jbe@7 | 1404 $case_folding_string.chomp.split("\n").each do |line| |
jbe@7 | 1405 next unless line =~ /([0-9A-F]+); [CFS]; ([0-9A-F ]+);/i |
jbe@7 | 1406 $case_folding[$1.hex] = $2.split(" ").collect { |e| e.hex } |
jbe@7 | 1407 end |
jbe@7 | 1408 |
jbe@7 | 1409 $int_array = [] |
jbe@7 | 1410 $int_array_indicies = {} |
jbe@7 | 1411 |
jbe@7 | 1412 def str2c(string, prefix) |
jbe@7 | 1413 return "0" if string.nil? |
jbe@7 | 1414 return "UTF8PROC_#{prefix}_#{string.upcase}" |
jbe@7 | 1415 end |
jbe@7 | 1416 def ary2c(array) |
jbe@7 | 1417 return "NULL" if array.nil? |
jbe@7 | 1418 unless $int_array_indicies[array] |
jbe@7 | 1419 $int_array_indicies[array] = $int_array.length |
jbe@7 | 1420 array.each { |entry| $int_array << entry } |
jbe@7 | 1421 $int_array << -1 |
jbe@7 | 1422 end |
jbe@7 | 1423 return "utf8proc_sequences + #{$int_array_indicies[array]}" |
jbe@7 | 1424 end |
jbe@7 | 1425 |
jbe@7 | 1426 class UnicodeChar |
jbe@7 | 1427 attr_accessor :code, :name, :category, :combining_class, :bidi_class, |
jbe@7 | 1428 :decomp_type, :decomp_mapping, |
jbe@7 | 1429 :bidi_mirrored, |
jbe@7 | 1430 :uppercase_mapping, :lowercase_mapping, :titlecase_mapping |
jbe@7 | 1431 def initialize(line) |
jbe@7 | 1432 raise "Could not parse input." unless line =~ /^ |
jbe@7 | 1433 ([0-9A-F]+); # code |
jbe@7 | 1434 ([^;]+); # name |
jbe@7 | 1435 ([A-Z]+); # general category |
jbe@7 | 1436 ([0-9]+); # canonical combining class |
jbe@7 | 1437 ([A-Z]+); # bidi class |
jbe@7 | 1438 (<([A-Z]*)>)? # decomposition type |
jbe@7 | 1439 ((\ ?[0-9A-F]+)*); # decompomposition mapping |
jbe@7 | 1440 ([0-9]*); # decimal digit |
jbe@7 | 1441 ([0-9]*); # digit |
jbe@7 | 1442 ([^;]*); # numeric |
jbe@7 | 1443 ([YN]*); # bidi mirrored |
jbe@7 | 1444 ([^;]*); # unicode 1.0 name |
jbe@7 | 1445 ([^;]*); # iso comment |
jbe@7 | 1446 ([0-9A-F]*); # simple uppercase mapping |
jbe@7 | 1447 ([0-9A-F]*); # simple lowercase mapping |
jbe@7 | 1448 ([0-9A-F]*)$/ix # simple titlecase mapping |
jbe@7 | 1449 @code = $1.hex |
jbe@7 | 1450 @name = $2 |
jbe@7 | 1451 @category = $3 |
jbe@7 | 1452 @combining_class = Integer($4) |
jbe@7 | 1453 @bidi_class = $5 |
jbe@7 | 1454 @decomp_type = $7 |
jbe@7 | 1455 @decomp_mapping = ($8=='') ? nil : |
jbe@7 | 1456 $8.split.collect { |element| element.hex } |
jbe@7 | 1457 @bidi_mirrored = ($13=='Y') ? true : false |
jbe@7 | 1458 @uppercase_mapping = ($16=='') ? nil : $16.hex |
jbe@7 | 1459 @lowercase_mapping = ($17=='') ? nil : $17.hex |
jbe@7 | 1460 @titlecase_mapping = ($18=='') ? nil : $18.hex |
jbe@7 | 1461 end |
jbe@7 | 1462 def case_folding |
jbe@7 | 1463 $case_folding[code] |
jbe@7 | 1464 end |
jbe@7 | 1465 def c_entry(comb1_indicies, comb2_indicies) |
jbe@7 | 1466 " " << |
jbe@7 | 1467 "{#{str2c category, 'CATEGORY'}, #{combining_class}, " << |
jbe@7 | 1468 "#{str2c bidi_class, 'BIDI_CLASS'}, " << |
jbe@7 | 1469 "#{str2c decomp_type, 'DECOMP_TYPE'}, " << |
jbe@7 | 1470 "#{ary2c decomp_mapping}, " << |
jbe@7 | 1471 "#{bidi_mirrored}, " << |
jbe@7 | 1472 "#{uppercase_mapping or -1}, " << |
jbe@7 | 1473 "#{lowercase_mapping or -1}, " << |
jbe@7 | 1474 "#{titlecase_mapping or -1}, " << |
jbe@7 | 1475 "#{comb1_indicies[code] ? |
jbe@7 | 1476 (comb1_indicies[code]*comb2_indicies.keys.length) : -1 |
jbe@7 | 1477 }, #{comb2_indicies[code] or -1}, " << |
jbe@7 | 1478 "#{$exclusions.include?(code) or $excl_version.include?(code)}, " << |
jbe@7 | 1479 "#{$ignorable.include?(code)}, " << |
jbe@7 | 1480 "#{%W[Zl Zp Cc Cf].include?(category) and not [0x200C, 0x200D].include?(category)}, " << |
jbe@7 | 1481 "#{$grapheme_extend.include?(code)}, " << |
jbe@7 | 1482 "#{ary2c case_folding}},\n" |
jbe@7 | 1483 end |
jbe@7 | 1484 end |
jbe@7 | 1485 |
jbe@7 | 1486 chars = [] |
jbe@7 | 1487 char_hash = {} |
jbe@7 | 1488 |
jbe@7 | 1489 while gets |
jbe@7 | 1490 if $_ =~ /^([0-9A-F]+);<[^;>,]+, First>;/i |
jbe@7 | 1491 first = $1.hex |
jbe@7 | 1492 gets |
jbe@7 | 1493 char = UnicodeChar.new($_) |
jbe@7 | 1494 raise "No last character of sequence found." unless |
jbe@7 | 1495 $_ =~ /^([0-9A-F]+);<([^;>,]+), Last>;/i |
jbe@7 | 1496 last = $1.hex |
jbe@7 | 1497 name = "<#{$2}>" |
jbe@7 | 1498 for i in first..last |
jbe@7 | 1499 char_clone = char.clone |
jbe@7 | 1500 char_clone.code = i |
jbe@7 | 1501 char_clone.name = name |
jbe@7 | 1502 char_hash[char_clone.code] = char_clone |
jbe@7 | 1503 chars << char_clone |
jbe@7 | 1504 end |
jbe@7 | 1505 else |
jbe@7 | 1506 char = UnicodeChar.new($_) |
jbe@7 | 1507 char_hash[char.code] = char |
jbe@7 | 1508 chars << char |
jbe@7 | 1509 end |
jbe@7 | 1510 end |
jbe@7 | 1511 |
jbe@7 | 1512 comb1st_indicies = {} |
jbe@7 | 1513 comb2nd_indicies = {} |
jbe@7 | 1514 comb_array = [] |
jbe@7 | 1515 |
jbe@7 | 1516 chars.each do |char| |
jbe@7 | 1517 if char.decomp_type.nil? and char.decomp_mapping and |
jbe@7 | 1518 char.decomp_mapping.length == 2 and |
jbe@7 | 1519 char_hash[char.decomp_mapping[0]].combining_class == 0 and |
jbe@7 | 1520 not $exclusions.include?(char.code) |
jbe@7 | 1521 unless comb1st_indicies[char.decomp_mapping[0]] |
jbe@7 | 1522 comb1st_indicies[char.decomp_mapping[0]] = comb1st_indicies.keys.length |
jbe@7 | 1523 end |
jbe@7 | 1524 unless comb2nd_indicies[char.decomp_mapping[1]] |
jbe@7 | 1525 comb2nd_indicies[char.decomp_mapping[1]] = comb2nd_indicies.keys.length |
jbe@7 | 1526 end |
jbe@7 | 1527 comb_array[comb1st_indicies[char.decomp_mapping[0]]] ||= [] |
jbe@7 | 1528 raise "Duplicate canonical mapping" if |
jbe@7 | 1529 comb_array[comb1st_indicies[char.decomp_mapping[0]]][ |
jbe@7 | 1530 comb2nd_indicies[char.decomp_mapping[1]]] |
jbe@7 | 1531 comb_array[comb1st_indicies[char.decomp_mapping[0]]][ |
jbe@7 | 1532 comb2nd_indicies[char.decomp_mapping[1]]] = char.code |
jbe@7 | 1533 end |
jbe@7 | 1534 end |
jbe@7 | 1535 |
jbe@7 | 1536 properties_indicies = {} |
jbe@7 | 1537 properties = [] |
jbe@7 | 1538 chars.each do |char| |
jbe@7 | 1539 c_entry = char.c_entry(comb1st_indicies, comb2nd_indicies) |
jbe@7 | 1540 unless properties_indicies[c_entry] |
jbe@7 | 1541 properties_indicies[c_entry] = properties.length |
jbe@7 | 1542 properties << c_entry |
jbe@7 | 1543 end |
jbe@7 | 1544 end |
jbe@7 | 1545 |
jbe@7 | 1546 stage1 = [] |
jbe@7 | 1547 stage2 = [] |
jbe@7 | 1548 for code in 0...0x110000 |
jbe@7 | 1549 next unless code % 0x100 == 0 |
jbe@7 | 1550 stage2_entry = [] |
jbe@7 | 1551 for code2 in code...(code+0x100) |
jbe@7 | 1552 if char_hash[code2] |
jbe@7 | 1553 stage2_entry << (properties_indicies[char_hash[code2].c_entry( |
jbe@7 | 1554 comb1st_indicies, comb2nd_indicies)] + 1) |
jbe@7 | 1555 else |
jbe@7 | 1556 stage2_entry << 0 |
jbe@7 | 1557 end |
jbe@7 | 1558 end |
jbe@7 | 1559 old_index = stage2.index(stage2_entry) |
jbe@7 | 1560 if old_index |
jbe@7 | 1561 stage1 << (old_index * 0x100) |
jbe@7 | 1562 else |
jbe@7 | 1563 stage1 << (stage2.length * 0x100) |
jbe@7 | 1564 stage2 << stage2_entry |
jbe@7 | 1565 end |
jbe@7 | 1566 end |
jbe@7 | 1567 |
jbe@7 | 1568 $stdout << "const int32_t utf8proc_sequences[] = {\n " |
jbe@7 | 1569 i = 0 |
jbe@7 | 1570 $int_array.each do |entry| |
jbe@7 | 1571 i += 1 |
jbe@7 | 1572 if i == 8 |
jbe@7 | 1573 i = 0 |
jbe@7 | 1574 $stdout << "\n " |
jbe@7 | 1575 end |
jbe@7 | 1576 $stdout << entry << ", " |
jbe@7 | 1577 end |
jbe@7 | 1578 $stdout << "};\n\n" |
jbe@7 | 1579 |
jbe@7 | 1580 $stdout << "const uint16_t utf8proc_stage1table[] = {\n " |
jbe@7 | 1581 i = 0 |
jbe@7 | 1582 stage1.each do |entry| |
jbe@7 | 1583 i += 1 |
jbe@7 | 1584 if i == 8 |
jbe@7 | 1585 i = 0 |
jbe@7 | 1586 $stdout << "\n " |
jbe@7 | 1587 end |
jbe@7 | 1588 $stdout << entry << ", " |
jbe@7 | 1589 end |
jbe@7 | 1590 $stdout << "};\n\n" |
jbe@7 | 1591 |
jbe@7 | 1592 $stdout << "const uint16_t utf8proc_stage2table[] = {\n " |
jbe@7 | 1593 i = 0 |
jbe@7 | 1594 stage2.flatten.each do |entry| |
jbe@7 | 1595 i += 1 |
jbe@7 | 1596 if i == 8 |
jbe@7 | 1597 i = 0 |
jbe@7 | 1598 $stdout << "\n " |
jbe@7 | 1599 end |
jbe@7 | 1600 $stdout << entry << ", " |
jbe@7 | 1601 end |
jbe@7 | 1602 $stdout << "};\n\n" |
jbe@7 | 1603 |
jbe@7 | 1604 $stdout << "const utf8proc_property_t utf8proc_properties[] = {\n" |
jbe@7 | 1605 $stdout << " {0, 0, 0, 0, NULL, false, -1, -1, -1, -1, -1, false},\n" |
jbe@7 | 1606 properties.each { |line| |
jbe@7 | 1607 $stdout << line |
jbe@7 | 1608 } |
jbe@7 | 1609 $stdout << "};\n\n" |
jbe@7 | 1610 |
jbe@7 | 1611 $stdout << "const int32_t utf8proc_combinations[] = {\n " |
jbe@7 | 1612 i = 0 |
jbe@7 | 1613 comb1st_indicies.keys.each_index do |a| |
jbe@7 | 1614 comb2nd_indicies.keys.each_index do |b| |
jbe@7 | 1615 i += 1 |
jbe@7 | 1616 if i == 8 |
jbe@7 | 1617 i = 0 |
jbe@7 | 1618 $stdout << "\n " |
jbe@7 | 1619 end |
jbe@7 | 1620 $stdout << ( comb_array[a][b] or -1 ) << ", " |
jbe@7 | 1621 end |
jbe@7 | 1622 end |
jbe@7 | 1623 $stdout << "};\n\n" |
jbe@7 | 1624 |