utf8proc

diff data_generator.rb @ 15:15450ff3d454

Contribution from libmojibake fork
author Jiahao Chen, Steven G. Johnson, Anthony David Kelman
date Fri Nov 21 08:27:44 2014 -0500 (2014-11-21)
parents 00d2bcbdc945
children
line diff
     1.1 --- a/data_generator.rb	Wed Nov 27 12:00:00 2013 +0100
     1.2 +++ b/data_generator.rb	Fri Nov 21 08:27:44 2014 -0500
     1.3 @@ -1,4 +1,4 @@
     1.4 -#!/usr/pkg/bin/ruby
     1.5 +#!/usr/bin/env ruby
     1.6  
     1.7  #  This file was used to generate the 'unicode_data.c' file by parsing the
     1.8  #  Unicode data file 'UnicodeData.txt' of the Unicode Character Database.
     1.9 @@ -65,42 +65,9 @@
    1.10  #  authorization of the copyright holder.
    1.11  
    1.12  
    1.13 -
    1.14 -$ignorable_list = <<END_OF_LIST
    1.15 -0000..0008    ; Default_Ignorable_Code_Point # Cc   [9] <control-0000>..<control-0008>
    1.16 -000E..001F    ; Default_Ignorable_Code_Point # Cc  [18] <control-000E>..<control-001F>
    1.17 -007F..0084    ; Default_Ignorable_Code_Point # Cc   [6] <control-007F>..<control-0084>
    1.18 -0086..009F    ; Default_Ignorable_Code_Point # Cc  [26] <control-0086>..<control-009F>
    1.19 -00AD          ; Default_Ignorable_Code_Point # Cf       SOFT HYPHEN
    1.20 -034F          ; Default_Ignorable_Code_Point # Mn       COMBINING GRAPHEME JOINER
    1.21 -0600..0603    ; Default_Ignorable_Code_Point # Cf   [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA
    1.22 -06DD          ; Default_Ignorable_Code_Point # Cf       ARABIC END OF AYAH
    1.23 -070F          ; Default_Ignorable_Code_Point # Cf       SYRIAC ABBREVIATION MARK
    1.24 -115F..1160    ; Default_Ignorable_Code_Point # Lo   [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER
    1.25 -17B4..17B5    ; Default_Ignorable_Code_Point # Cf   [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
    1.26 -180B..180D    ; Default_Ignorable_Code_Point # Mn   [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
    1.27 -200B..200F    ; Default_Ignorable_Code_Point # Cf   [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK
    1.28 -202A..202E    ; Default_Ignorable_Code_Point # Cf   [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
    1.29 -2060..2063    ; Default_Ignorable_Code_Point # Cf   [4] WORD JOINER..INVISIBLE SEPARATOR
    1.30 -2064..2069    ; Default_Ignorable_Code_Point # Cn   [6] <reserved-2064>..<reserved-2069>
    1.31 -206A..206F    ; Default_Ignorable_Code_Point # Cf   [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
    1.32 -3164          ; Default_Ignorable_Code_Point # Lo       HANGUL FILLER
    1.33 -D800..DFFF    ; Default_Ignorable_Code_Point # Cs [2048] <surrogate-D800>..<surrogate-DFFF>
    1.34 -FE00..FE0F    ; Default_Ignorable_Code_Point # Mn  [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
    1.35 -FEFF          ; Default_Ignorable_Code_Point # Cf       ZERO WIDTH NO-BREAK SPACE
    1.36 -FFA0          ; Default_Ignorable_Code_Point # Lo       HALFWIDTH HANGUL FILLER
    1.37 -FFF0..FFF8    ; Default_Ignorable_Code_Point # Cn   [9] <reserved-FFF0>..<reserved-FFF8>
    1.38 -1D173..1D17A  ; Default_Ignorable_Code_Point # Cf   [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
    1.39 -E0001         ; Default_Ignorable_Code_Point # Cf       LANGUAGE TAG
    1.40 -E0002..E001F  ; Default_Ignorable_Code_Point # Cn  [30] <reserved-E0002>..<reserved-E001F>
    1.41 -E0020..E007F  ; Default_Ignorable_Code_Point # Cf  [96] TAG SPACE..CANCEL TAG
    1.42 -E0080..E00FF  ; Default_Ignorable_Code_Point # Cn [128] <reserved-E0080>..<reserved-E00FF>
    1.43 -E0100..E01EF  ; Default_Ignorable_Code_Point # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
    1.44 -E01F0..E0FFF  ; Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
    1.45 -END_OF_LIST
    1.46 -
    1.47 +$ignorable_list = File.read("DerivedCoreProperties.txt")[/# Derived Property: Default_Ignorable_Code_Point.*?# Total code points:/m]
    1.48  $ignorable = []
    1.49 -$ignorable_list.each do |entry|
    1.50 +$ignorable_list.each_line do |entry|
    1.51    if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
    1.52      $1.hex.upto($2.hex) { |e2| $ignorable << e2 }
    1.53    elsif entry =~ /^[0-9A-F]+/
    1.54 @@ -108,162 +75,9 @@
    1.55    end
    1.56  end
    1.57  
    1.58 -$grapheme_extend_list = <<END_OF_LIST
    1.59 -0300..036F    ; Grapheme_Extend # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
    1.60 -0483..0486    ; Grapheme_Extend # Mn   [4] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC PSILI PNEUMATA
    1.61 -0488..0489    ; Grapheme_Extend # Me   [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
    1.62 -0591..05BD    ; Grapheme_Extend # Mn  [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
    1.63 -05BF          ; Grapheme_Extend # Mn       HEBREW POINT RAFE
    1.64 -05C1..05C2    ; Grapheme_Extend # Mn   [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
    1.65 -05C4..05C5    ; Grapheme_Extend # Mn   [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
    1.66 -05C7          ; Grapheme_Extend # Mn       HEBREW POINT QAMATS QATAN
    1.67 -0610..0615    ; Grapheme_Extend # Mn   [6] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL HIGH TAH
    1.68 -064B..065E    ; Grapheme_Extend # Mn  [20] ARABIC FATHATAN..ARABIC FATHA WITH TWO DOTS
    1.69 -0670          ; Grapheme_Extend # Mn       ARABIC LETTER SUPERSCRIPT ALEF
    1.70 -06D6..06DC    ; Grapheme_Extend # Mn   [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
    1.71 -06DE          ; Grapheme_Extend # Me       ARABIC START OF RUB EL HIZB
    1.72 -06DF..06E4    ; Grapheme_Extend # Mn   [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
    1.73 -06E7..06E8    ; Grapheme_Extend # Mn   [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
    1.74 -06EA..06ED    ; Grapheme_Extend # Mn   [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
    1.75 -0711          ; Grapheme_Extend # Mn       SYRIAC LETTER SUPERSCRIPT ALAPH
    1.76 -0730..074A    ; Grapheme_Extend # Mn  [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
    1.77 -07A6..07B0    ; Grapheme_Extend # Mn  [11] THAANA ABAFILI..THAANA SUKUN
    1.78 -07EB..07F3    ; Grapheme_Extend # Mn   [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
    1.79 -0901..0902    ; Grapheme_Extend # Mn   [2] DEVANAGARI SIGN CANDRABINDU..DEVANAGARI SIGN ANUSVARA
    1.80 -093C          ; Grapheme_Extend # Mn       DEVANAGARI SIGN NUKTA
    1.81 -0941..0948    ; Grapheme_Extend # Mn   [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
    1.82 -094D          ; Grapheme_Extend # Mn       DEVANAGARI SIGN VIRAMA
    1.83 -0951..0954    ; Grapheme_Extend # Mn   [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT
    1.84 -0962..0963    ; Grapheme_Extend # Mn   [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
    1.85 -0981          ; Grapheme_Extend # Mn       BENGALI SIGN CANDRABINDU
    1.86 -09BC          ; Grapheme_Extend # Mn       BENGALI SIGN NUKTA
    1.87 -09BE          ; Grapheme_Extend # Mc       BENGALI VOWEL SIGN AA
    1.88 -09C1..09C4    ; Grapheme_Extend # Mn   [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR
    1.89 -09CD          ; Grapheme_Extend # Mn       BENGALI SIGN VIRAMA
    1.90 -09D7          ; Grapheme_Extend # Mc       BENGALI AU LENGTH MARK
    1.91 -09E2..09E3    ; Grapheme_Extend # Mn   [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL
    1.92 -0A01..0A02    ; Grapheme_Extend # Mn   [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI
    1.93 -0A3C          ; Grapheme_Extend # Mn       GURMUKHI SIGN NUKTA
    1.94 -0A41..0A42    ; Grapheme_Extend # Mn   [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU
    1.95 -0A47..0A48    ; Grapheme_Extend # Mn   [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI
    1.96 -0A4B..0A4D    ; Grapheme_Extend # Mn   [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA
    1.97 -0A70..0A71    ; Grapheme_Extend # Mn   [2] GURMUKHI TIPPI..GURMUKHI ADDAK
    1.98 -0A81..0A82    ; Grapheme_Extend # Mn   [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA
    1.99 -0ABC          ; Grapheme_Extend # Mn       GUJARATI SIGN NUKTA
   1.100 -0AC1..0AC5    ; Grapheme_Extend # Mn   [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E
   1.101 -0AC7..0AC8    ; Grapheme_Extend # Mn   [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI
   1.102 -0ACD          ; Grapheme_Extend # Mn       GUJARATI SIGN VIRAMA
   1.103 -0AE2..0AE3    ; Grapheme_Extend # Mn   [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL
   1.104 -0B01          ; Grapheme_Extend # Mn       ORIYA SIGN CANDRABINDU
   1.105 -0B3C          ; Grapheme_Extend # Mn       ORIYA SIGN NUKTA
   1.106 -0B3E          ; Grapheme_Extend # Mc       ORIYA VOWEL SIGN AA
   1.107 -0B3F          ; Grapheme_Extend # Mn       ORIYA VOWEL SIGN I
   1.108 -0B41..0B43    ; Grapheme_Extend # Mn   [3] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC R
   1.109 -0B4D          ; Grapheme_Extend # Mn       ORIYA SIGN VIRAMA
   1.110 -0B56          ; Grapheme_Extend # Mn       ORIYA AI LENGTH MARK
   1.111 -0B57          ; Grapheme_Extend # Mc       ORIYA AU LENGTH MARK
   1.112 -0B82          ; Grapheme_Extend # Mn       TAMIL SIGN ANUSVARA
   1.113 -0BBE          ; Grapheme_Extend # Mc       TAMIL VOWEL SIGN AA
   1.114 -0BC0          ; Grapheme_Extend # Mn       TAMIL VOWEL SIGN II
   1.115 -0BCD          ; Grapheme_Extend # Mn       TAMIL SIGN VIRAMA
   1.116 -0BD7          ; Grapheme_Extend # Mc       TAMIL AU LENGTH MARK
   1.117 -0C3E..0C40    ; Grapheme_Extend # Mn   [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
   1.118 -0C46..0C48    ; Grapheme_Extend # Mn   [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI
   1.119 -0C4A..0C4D    ; Grapheme_Extend # Mn   [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
   1.120 -0C55..0C56    ; Grapheme_Extend # Mn   [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
   1.121 -0CBC          ; Grapheme_Extend # Mn       KANNADA SIGN NUKTA
   1.122 -0CBF          ; Grapheme_Extend # Mn       KANNADA VOWEL SIGN I
   1.123 -0CC2          ; Grapheme_Extend # Mc       KANNADA VOWEL SIGN UU
   1.124 -0CC6          ; Grapheme_Extend # Mn       KANNADA VOWEL SIGN E
   1.125 -0CCC..0CCD    ; Grapheme_Extend # Mn   [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
   1.126 -0CD5..0CD6    ; Grapheme_Extend # Mc   [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
   1.127 -0CE2..0CE3    ; Grapheme_Extend # Mn   [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
   1.128 -0D3E          ; Grapheme_Extend # Mc       MALAYALAM VOWEL SIGN AA
   1.129 -0D41..0D43    ; Grapheme_Extend # Mn   [3] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC R
   1.130 -0D4D          ; Grapheme_Extend # Mn       MALAYALAM SIGN VIRAMA
   1.131 -0D57          ; Grapheme_Extend # Mc       MALAYALAM AU LENGTH MARK
   1.132 -0DCA          ; Grapheme_Extend # Mn       SINHALA SIGN AL-LAKUNA
   1.133 -0DCF          ; Grapheme_Extend # Mc       SINHALA VOWEL SIGN AELA-PILLA
   1.134 -0DD2..0DD4    ; Grapheme_Extend # Mn   [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA
   1.135 -0DD6          ; Grapheme_Extend # Mn       SINHALA VOWEL SIGN DIGA PAA-PILLA
   1.136 -0DDF          ; Grapheme_Extend # Mc       SINHALA VOWEL SIGN GAYANUKITTA
   1.137 -0E31          ; Grapheme_Extend # Mn       THAI CHARACTER MAI HAN-AKAT
   1.138 -0E34..0E3A    ; Grapheme_Extend # Mn   [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU
   1.139 -0E47..0E4E    ; Grapheme_Extend # Mn   [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN
   1.140 -0EB1          ; Grapheme_Extend # Mn       LAO VOWEL SIGN MAI KAN
   1.141 -0EB4..0EB9    ; Grapheme_Extend # Mn   [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU
   1.142 -0EBB..0EBC    ; Grapheme_Extend # Mn   [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO
   1.143 -0EC8..0ECD    ; Grapheme_Extend # Mn   [6] LAO TONE MAI EK..LAO NIGGAHITA
   1.144 -0F18..0F19    ; Grapheme_Extend # Mn   [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS
   1.145 -0F35          ; Grapheme_Extend # Mn       TIBETAN MARK NGAS BZUNG NYI ZLA
   1.146 -0F37          ; Grapheme_Extend # Mn       TIBETAN MARK NGAS BZUNG SGOR RTAGS
   1.147 -0F39          ; Grapheme_Extend # Mn       TIBETAN MARK TSA -PHRU
   1.148 -0F71..0F7E    ; Grapheme_Extend # Mn  [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO
   1.149 -0F80..0F84    ; Grapheme_Extend # Mn   [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA
   1.150 -0F86..0F87    ; Grapheme_Extend # Mn   [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
   1.151 -0F90..0F97    ; Grapheme_Extend # Mn   [8] TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOINED LETTER JA
   1.152 -0F99..0FBC    ; Grapheme_Extend # Mn  [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA
   1.153 -0FC6          ; Grapheme_Extend # Mn       TIBETAN SYMBOL PADMA GDAN
   1.154 -102D..1030    ; Grapheme_Extend # Mn   [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU
   1.155 -1032          ; Grapheme_Extend # Mn       MYANMAR VOWEL SIGN AI
   1.156 -1036..1037    ; Grapheme_Extend # Mn   [2] MYANMAR SIGN ANUSVARA..MYANMAR SIGN DOT BELOW
   1.157 -1039          ; Grapheme_Extend # Mn       MYANMAR SIGN VIRAMA
   1.158 -1058..1059    ; Grapheme_Extend # Mn   [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL
   1.159 -135F          ; Grapheme_Extend # Mn       ETHIOPIC COMBINING GEMINATION MARK
   1.160 -1712..1714    ; Grapheme_Extend # Mn   [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
   1.161 -1732..1734    ; Grapheme_Extend # Mn   [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD
   1.162 -1752..1753    ; Grapheme_Extend # Mn   [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U
   1.163 -1772..1773    ; Grapheme_Extend # Mn   [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U
   1.164 -17B7..17BD    ; Grapheme_Extend # Mn   [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA
   1.165 -17C6          ; Grapheme_Extend # Mn       KHMER SIGN NIKAHIT
   1.166 -17C9..17D3    ; Grapheme_Extend # Mn  [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
   1.167 -17DD          ; Grapheme_Extend # Mn       KHMER SIGN ATTHACAN
   1.168 -180B..180D    ; Grapheme_Extend # Mn   [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
   1.169 -18A9          ; Grapheme_Extend # Mn       MONGOLIAN LETTER ALI GALI DAGALGA
   1.170 -1920..1922    ; Grapheme_Extend # Mn   [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
   1.171 -1927..1928    ; Grapheme_Extend # Mn   [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O
   1.172 -1932          ; Grapheme_Extend # Mn       LIMBU SMALL LETTER ANUSVARA
   1.173 -1939..193B    ; Grapheme_Extend # Mn   [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I
   1.174 -1A17..1A18    ; Grapheme_Extend # Mn   [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
   1.175 -1B00..1B03    ; Grapheme_Extend # Mn   [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
   1.176 -1B34          ; Grapheme_Extend # Mn       BALINESE SIGN REREKAN
   1.177 -1B36..1B3A    ; Grapheme_Extend # Mn   [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA
   1.178 -1B3C          ; Grapheme_Extend # Mn       BALINESE VOWEL SIGN LA LENGA
   1.179 -1B42          ; Grapheme_Extend # Mn       BALINESE VOWEL SIGN PEPET
   1.180 -1B6B..1B73    ; Grapheme_Extend # Mn   [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
   1.181 -1DC0..1DCA    ; Grapheme_Extend # Mn  [11] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER R BELOW
   1.182 -1DFE..1DFF    ; Grapheme_Extend # Mn   [2] COMBINING LEFT ARROWHEAD ABOVE..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
   1.183 -200C..200D    ; Grapheme_Extend # Cf   [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
   1.184 -20D0..20DC    ; Grapheme_Extend # Mn  [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
   1.185 -20DD..20E0    ; Grapheme_Extend # Me   [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
   1.186 -20E1          ; Grapheme_Extend # Mn       COMBINING LEFT RIGHT ARROW ABOVE
   1.187 -20E2..20E4    ; Grapheme_Extend # Me   [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE
   1.188 -20E5..20EF    ; Grapheme_Extend # Mn  [11] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING RIGHT ARROW BELOW
   1.189 -302A..302F    ; Grapheme_Extend # Mn   [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK
   1.190 -3099..309A    ; Grapheme_Extend # Mn   [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
   1.191 -A806          ; Grapheme_Extend # Mn       SYLOTI NAGRI SIGN HASANTA
   1.192 -A80B          ; Grapheme_Extend # Mn       SYLOTI NAGRI SIGN ANUSVARA
   1.193 -A825..A826    ; Grapheme_Extend # Mn   [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
   1.194 -FB1E          ; Grapheme_Extend # Mn       HEBREW POINT JUDEO-SPANISH VARIKA
   1.195 -FE00..FE0F    ; Grapheme_Extend # Mn  [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
   1.196 -FE20..FE23    ; Grapheme_Extend # Mn   [4] COMBINING LIGATURE LEFT HALF..COMBINING DOUBLE TILDE RIGHT HALF
   1.197 -10A01..10A03  ; Grapheme_Extend # Mn   [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R
   1.198 -10A05..10A06  ; Grapheme_Extend # Mn   [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O
   1.199 -10A0C..10A0F  ; Grapheme_Extend # Mn   [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
   1.200 -10A38..10A3A  ; Grapheme_Extend # Mn   [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
   1.201 -10A3F         ; Grapheme_Extend # Mn       KHAROSHTHI VIRAMA
   1.202 -1D165         ; Grapheme_Extend # Mc       MUSICAL SYMBOL COMBINING STEM
   1.203 -1D167..1D169  ; Grapheme_Extend # Mn   [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
   1.204 -1D16E..1D172  ; Grapheme_Extend # Mc   [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5
   1.205 -1D17B..1D182  ; Grapheme_Extend # Mn   [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
   1.206 -1D185..1D18B  ; Grapheme_Extend # Mn   [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
   1.207 -1D1AA..1D1AD  ; Grapheme_Extend # Mn   [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
   1.208 -1D242..1D244  ; Grapheme_Extend # Mn   [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
   1.209 -E0100..E01EF  ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
   1.210 -END_OF_LIST
   1.211 -
   1.212 +$grapheme_extend_list = File.read("DerivedCoreProperties.txt")[/# Derived Property: Grapheme_Extend.*?# Total code points:/m]
   1.213  $grapheme_extend = []
   1.214 -$grapheme_extend_list.each do |entry|
   1.215 +$grapheme_extend_list.each_line do |entry|
   1.216    if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
   1.217      $1.hex.upto($2.hex) { |e2| $grapheme_extend << e2 }
   1.218    elsif entry =~ /^[0-9A-F]+/
   1.219 @@ -271,1134 +85,13 @@
   1.220    end
   1.221  end
   1.222  
   1.223 -$exclusions = <<END_OF_LIST
   1.224 -0958    #  DEVANAGARI LETTER QA
   1.225 -0959    #  DEVANAGARI LETTER KHHA
   1.226 -095A    #  DEVANAGARI LETTER GHHA
   1.227 -095B    #  DEVANAGARI LETTER ZA
   1.228 -095C    #  DEVANAGARI LETTER DDDHA
   1.229 -095D    #  DEVANAGARI LETTER RHA
   1.230 -095E    #  DEVANAGARI LETTER FA
   1.231 -095F    #  DEVANAGARI LETTER YYA
   1.232 -09DC    #  BENGALI LETTER RRA
   1.233 -09DD    #  BENGALI LETTER RHA
   1.234 -09DF    #  BENGALI LETTER YYA
   1.235 -0A33    #  GURMUKHI LETTER LLA
   1.236 -0A36    #  GURMUKHI LETTER SHA
   1.237 -0A59    #  GURMUKHI LETTER KHHA
   1.238 -0A5A    #  GURMUKHI LETTER GHHA
   1.239 -0A5B    #  GURMUKHI LETTER ZA
   1.240 -0A5E    #  GURMUKHI LETTER FA
   1.241 -0B5C    #  ORIYA LETTER RRA
   1.242 -0B5D    #  ORIYA LETTER RHA
   1.243 -0F43    #  TIBETAN LETTER GHA
   1.244 -0F4D    #  TIBETAN LETTER DDHA
   1.245 -0F52    #  TIBETAN LETTER DHA
   1.246 -0F57    #  TIBETAN LETTER BHA
   1.247 -0F5C    #  TIBETAN LETTER DZHA
   1.248 -0F69    #  TIBETAN LETTER KSSA
   1.249 -0F76    #  TIBETAN VOWEL SIGN VOCALIC R
   1.250 -0F78    #  TIBETAN VOWEL SIGN VOCALIC L
   1.251 -0F93    #  TIBETAN SUBJOINED LETTER GHA
   1.252 -0F9D    #  TIBETAN SUBJOINED LETTER DDHA
   1.253 -0FA2    #  TIBETAN SUBJOINED LETTER DHA
   1.254 -0FA7    #  TIBETAN SUBJOINED LETTER BHA
   1.255 -0FAC    #  TIBETAN SUBJOINED LETTER DZHA
   1.256 -0FB9    #  TIBETAN SUBJOINED LETTER KSSA
   1.257 -FB1D    #  HEBREW LETTER YOD WITH HIRIQ
   1.258 -FB1F    #  HEBREW LIGATURE YIDDISH YOD YOD PATAH
   1.259 -FB2A    #  HEBREW LETTER SHIN WITH SHIN DOT
   1.260 -FB2B    #  HEBREW LETTER SHIN WITH SIN DOT
   1.261 -FB2C    #  HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT
   1.262 -FB2D    #  HEBREW LETTER SHIN WITH DAGESH AND SIN DOT
   1.263 -FB2E    #  HEBREW LETTER ALEF WITH PATAH
   1.264 -FB2F    #  HEBREW LETTER ALEF WITH QAMATS
   1.265 -FB30    #  HEBREW LETTER ALEF WITH MAPIQ
   1.266 -FB31    #  HEBREW LETTER BET WITH DAGESH
   1.267 -FB32    #  HEBREW LETTER GIMEL WITH DAGESH
   1.268 -FB33    #  HEBREW LETTER DALET WITH DAGESH
   1.269 -FB34    #  HEBREW LETTER HE WITH MAPIQ
   1.270 -FB35    #  HEBREW LETTER VAV WITH DAGESH
   1.271 -FB36    #  HEBREW LETTER ZAYIN WITH DAGESH
   1.272 -FB38    #  HEBREW LETTER TET WITH DAGESH
   1.273 -FB39    #  HEBREW LETTER YOD WITH DAGESH
   1.274 -FB3A    #  HEBREW LETTER FINAL KAF WITH DAGESH
   1.275 -FB3B    #  HEBREW LETTER KAF WITH DAGESH
   1.276 -FB3C    #  HEBREW LETTER LAMED WITH DAGESH
   1.277 -FB3E    #  HEBREW LETTER MEM WITH DAGESH
   1.278 -FB40    #  HEBREW LETTER NUN WITH DAGESH
   1.279 -FB41    #  HEBREW LETTER SAMEKH WITH DAGESH
   1.280 -FB43    #  HEBREW LETTER FINAL PE WITH DAGESH
   1.281 -FB44    #  HEBREW LETTER PE WITH DAGESH
   1.282 -FB46    #  HEBREW LETTER TSADI WITH DAGESH
   1.283 -FB47    #  HEBREW LETTER QOF WITH DAGESH
   1.284 -FB48    #  HEBREW LETTER RESH WITH DAGESH
   1.285 -FB49    #  HEBREW LETTER SHIN WITH DAGESH
   1.286 -FB4A    #  HEBREW LETTER TAV WITH DAGESH
   1.287 -FB4B    #  HEBREW LETTER VAV WITH HOLAM
   1.288 -FB4C    #  HEBREW LETTER BET WITH RAFE
   1.289 -FB4D    #  HEBREW LETTER KAF WITH RAFE
   1.290 -FB4E    #  HEBREW LETTER PE WITH RAFE
   1.291 -END_OF_LIST
   1.292 +$exclusions = File.read("CompositionExclusions.txt")[/# \(1\) Script Specifics.*?# Total code points:/m]
   1.293  $exclusions = $exclusions.chomp.split("\n").collect { |e| e.hex }
   1.294  
   1.295 -$excl_version = <<END_OF_LIST
   1.296 -2ADC    #  FORKING
   1.297 -1D15E   #  MUSICAL SYMBOL HALF NOTE
   1.298 -1D15F   #  MUSICAL SYMBOL QUARTER NOTE
   1.299 -1D160   #  MUSICAL SYMBOL EIGHTH NOTE
   1.300 -1D161   #  MUSICAL SYMBOL SIXTEENTH NOTE
   1.301 -1D162   #  MUSICAL SYMBOL THIRTY-SECOND NOTE
   1.302 -1D163   #  MUSICAL SYMBOL SIXTY-FOURTH NOTE
   1.303 -1D164   #  MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
   1.304 -1D1BB   #  MUSICAL SYMBOL MINIMA
   1.305 -1D1BC   #  MUSICAL SYMBOL MINIMA BLACK
   1.306 -1D1BD   #  MUSICAL SYMBOL SEMIMINIMA WHITE
   1.307 -1D1BE   #  MUSICAL SYMBOL SEMIMINIMA BLACK
   1.308 -1D1BF   #  MUSICAL SYMBOL FUSA WHITE
   1.309 -1D1C0   #  MUSICAL SYMBOL FUSA BLACK
   1.310 -END_OF_LIST
   1.311 +$excl_version = File.read("CompositionExclusions.txt")[/# \(2\) Post Composition Version precomposed characters.*?# Total code points:/m]
   1.312  $excl_version = $excl_version.chomp.split("\n").collect { |e| e.hex }
   1.313  
   1.314 -$case_folding_string = <<END_OF_LIST
   1.315 -0041; C; 0061; # LATIN CAPITAL LETTER A
   1.316 -0042; C; 0062; # LATIN CAPITAL LETTER B
   1.317 -0043; C; 0063; # LATIN CAPITAL LETTER C
   1.318 -0044; C; 0064; # LATIN CAPITAL LETTER D
   1.319 -0045; C; 0065; # LATIN CAPITAL LETTER E
   1.320 -0046; C; 0066; # LATIN CAPITAL LETTER F
   1.321 -0047; C; 0067; # LATIN CAPITAL LETTER G
   1.322 -0048; C; 0068; # LATIN CAPITAL LETTER H
   1.323 -0049; C; 0069; # LATIN CAPITAL LETTER I
   1.324 -004A; C; 006A; # LATIN CAPITAL LETTER J
   1.325 -004B; C; 006B; # LATIN CAPITAL LETTER K
   1.326 -004C; C; 006C; # LATIN CAPITAL LETTER L
   1.327 -004D; C; 006D; # LATIN CAPITAL LETTER M
   1.328 -004E; C; 006E; # LATIN CAPITAL LETTER N
   1.329 -004F; C; 006F; # LATIN CAPITAL LETTER O
   1.330 -0050; C; 0070; # LATIN CAPITAL LETTER P
   1.331 -0051; C; 0071; # LATIN CAPITAL LETTER Q
   1.332 -0052; C; 0072; # LATIN CAPITAL LETTER R
   1.333 -0053; C; 0073; # LATIN CAPITAL LETTER S
   1.334 -0054; C; 0074; # LATIN CAPITAL LETTER T
   1.335 -0055; C; 0075; # LATIN CAPITAL LETTER U
   1.336 -0056; C; 0076; # LATIN CAPITAL LETTER V
   1.337 -0057; C; 0077; # LATIN CAPITAL LETTER W
   1.338 -0058; C; 0078; # LATIN CAPITAL LETTER X
   1.339 -0059; C; 0079; # LATIN CAPITAL LETTER Y
   1.340 -005A; C; 007A; # LATIN CAPITAL LETTER Z
   1.341 -00B5; C; 03BC; # MICRO SIGN
   1.342 -00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE
   1.343 -00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE
   1.344 -00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
   1.345 -00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE
   1.346 -00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS
   1.347 -00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE
   1.348 -00C6; C; 00E6; # LATIN CAPITAL LETTER AE
   1.349 -00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA
   1.350 -00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE
   1.351 -00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE
   1.352 -00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
   1.353 -00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS
   1.354 -00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE
   1.355 -00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE
   1.356 -00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
   1.357 -00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS
   1.358 -00D0; C; 00F0; # LATIN CAPITAL LETTER ETH
   1.359 -00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE
   1.360 -00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE
   1.361 -00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE
   1.362 -00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
   1.363 -00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE
   1.364 -00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS
   1.365 -00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE
   1.366 -00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE
   1.367 -00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE
   1.368 -00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
   1.369 -00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS
   1.370 -00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE
   1.371 -00DE; C; 00FE; # LATIN CAPITAL LETTER THORN
   1.372 -00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S
   1.373 -0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON
   1.374 -0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE
   1.375 -0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK
   1.376 -0106; C; 0107; # LATIN CAPITAL LETTER C WITH ACUTE
   1.377 -0108; C; 0109; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
   1.378 -010A; C; 010B; # LATIN CAPITAL LETTER C WITH DOT ABOVE
   1.379 -010C; C; 010D; # LATIN CAPITAL LETTER C WITH CARON
   1.380 -010E; C; 010F; # LATIN CAPITAL LETTER D WITH CARON
   1.381 -0110; C; 0111; # LATIN CAPITAL LETTER D WITH STROKE
   1.382 -0112; C; 0113; # LATIN CAPITAL LETTER E WITH MACRON
   1.383 -0114; C; 0115; # LATIN CAPITAL LETTER E WITH BREVE
   1.384 -0116; C; 0117; # LATIN CAPITAL LETTER E WITH DOT ABOVE
   1.385 -0118; C; 0119; # LATIN CAPITAL LETTER E WITH OGONEK
   1.386 -011A; C; 011B; # LATIN CAPITAL LETTER E WITH CARON
   1.387 -011C; C; 011D; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
   1.388 -011E; C; 011F; # LATIN CAPITAL LETTER G WITH BREVE
   1.389 -0120; C; 0121; # LATIN CAPITAL LETTER G WITH DOT ABOVE
   1.390 -0122; C; 0123; # LATIN CAPITAL LETTER G WITH CEDILLA
   1.391 -0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
   1.392 -0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE
   1.393 -0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE
   1.394 -012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON
   1.395 -012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE
   1.396 -012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK
   1.397 -0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
   1.398 -0132; C; 0133; # LATIN CAPITAL LIGATURE IJ
   1.399 -0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
   1.400 -0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA
   1.401 -0139; C; 013A; # LATIN CAPITAL LETTER L WITH ACUTE
   1.402 -013B; C; 013C; # LATIN CAPITAL LETTER L WITH CEDILLA
   1.403 -013D; C; 013E; # LATIN CAPITAL LETTER L WITH CARON
   1.404 -013F; C; 0140; # LATIN CAPITAL LETTER L WITH MIDDLE DOT
   1.405 -0141; C; 0142; # LATIN CAPITAL LETTER L WITH STROKE
   1.406 -0143; C; 0144; # LATIN CAPITAL LETTER N WITH ACUTE
   1.407 -0145; C; 0146; # LATIN CAPITAL LETTER N WITH CEDILLA
   1.408 -0147; C; 0148; # LATIN CAPITAL LETTER N WITH CARON
   1.409 -0149; F; 02BC 006E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
   1.410 -014A; C; 014B; # LATIN CAPITAL LETTER ENG
   1.411 -014C; C; 014D; # LATIN CAPITAL LETTER O WITH MACRON
   1.412 -014E; C; 014F; # LATIN CAPITAL LETTER O WITH BREVE
   1.413 -0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
   1.414 -0152; C; 0153; # LATIN CAPITAL LIGATURE OE
   1.415 -0154; C; 0155; # LATIN CAPITAL LETTER R WITH ACUTE
   1.416 -0156; C; 0157; # LATIN CAPITAL LETTER R WITH CEDILLA
   1.417 -0158; C; 0159; # LATIN CAPITAL LETTER R WITH CARON
   1.418 -015A; C; 015B; # LATIN CAPITAL LETTER S WITH ACUTE
   1.419 -015C; C; 015D; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
   1.420 -015E; C; 015F; # LATIN CAPITAL LETTER S WITH CEDILLA
   1.421 -0160; C; 0161; # LATIN CAPITAL LETTER S WITH CARON
   1.422 -0162; C; 0163; # LATIN CAPITAL LETTER T WITH CEDILLA
   1.423 -0164; C; 0165; # LATIN CAPITAL LETTER T WITH CARON
   1.424 -0166; C; 0167; # LATIN CAPITAL LETTER T WITH STROKE
   1.425 -0168; C; 0169; # LATIN CAPITAL LETTER U WITH TILDE
   1.426 -016A; C; 016B; # LATIN CAPITAL LETTER U WITH MACRON
   1.427 -016C; C; 016D; # LATIN CAPITAL LETTER U WITH BREVE
   1.428 -016E; C; 016F; # LATIN CAPITAL LETTER U WITH RING ABOVE
   1.429 -0170; C; 0171; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
   1.430 -0172; C; 0173; # LATIN CAPITAL LETTER U WITH OGONEK
   1.431 -0174; C; 0175; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
   1.432 -0176; C; 0177; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
   1.433 -0178; C; 00FF; # LATIN CAPITAL LETTER Y WITH DIAERESIS
   1.434 -0179; C; 017A; # LATIN CAPITAL LETTER Z WITH ACUTE
   1.435 -017B; C; 017C; # LATIN CAPITAL LETTER Z WITH DOT ABOVE
   1.436 -017D; C; 017E; # LATIN CAPITAL LETTER Z WITH CARON
   1.437 -017F; C; 0073; # LATIN SMALL LETTER LONG S
   1.438 -0181; C; 0253; # LATIN CAPITAL LETTER B WITH HOOK
   1.439 -0182; C; 0183; # LATIN CAPITAL LETTER B WITH TOPBAR
   1.440 -0184; C; 0185; # LATIN CAPITAL LETTER TONE SIX
   1.441 -0186; C; 0254; # LATIN CAPITAL LETTER OPEN O
   1.442 -0187; C; 0188; # LATIN CAPITAL LETTER C WITH HOOK
   1.443 -0189; C; 0256; # LATIN CAPITAL LETTER AFRICAN D
   1.444 -018A; C; 0257; # LATIN CAPITAL LETTER D WITH HOOK
   1.445 -018B; C; 018C; # LATIN CAPITAL LETTER D WITH TOPBAR
   1.446 -018E; C; 01DD; # LATIN CAPITAL LETTER REVERSED E
   1.447 -018F; C; 0259; # LATIN CAPITAL LETTER SCHWA
   1.448 -0190; C; 025B; # LATIN CAPITAL LETTER OPEN E
   1.449 -0191; C; 0192; # LATIN CAPITAL LETTER F WITH HOOK
   1.450 -0193; C; 0260; # LATIN CAPITAL LETTER G WITH HOOK
   1.451 -0194; C; 0263; # LATIN CAPITAL LETTER GAMMA
   1.452 -0196; C; 0269; # LATIN CAPITAL LETTER IOTA
   1.453 -0197; C; 0268; # LATIN CAPITAL LETTER I WITH STROKE
   1.454 -0198; C; 0199; # LATIN CAPITAL LETTER K WITH HOOK
   1.455 -019C; C; 026F; # LATIN CAPITAL LETTER TURNED M
   1.456 -019D; C; 0272; # LATIN CAPITAL LETTER N WITH LEFT HOOK
   1.457 -019F; C; 0275; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
   1.458 -01A0; C; 01A1; # LATIN CAPITAL LETTER O WITH HORN
   1.459 -01A2; C; 01A3; # LATIN CAPITAL LETTER OI
   1.460 -01A4; C; 01A5; # LATIN CAPITAL LETTER P WITH HOOK
   1.461 -01A6; C; 0280; # LATIN LETTER YR
   1.462 -01A7; C; 01A8; # LATIN CAPITAL LETTER TONE TWO
   1.463 -01A9; C; 0283; # LATIN CAPITAL LETTER ESH
   1.464 -01AC; C; 01AD; # LATIN CAPITAL LETTER T WITH HOOK
   1.465 -01AE; C; 0288; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
   1.466 -01AF; C; 01B0; # LATIN CAPITAL LETTER U WITH HORN
   1.467 -01B1; C; 028A; # LATIN CAPITAL LETTER UPSILON
   1.468 -01B2; C; 028B; # LATIN CAPITAL LETTER V WITH HOOK
   1.469 -01B3; C; 01B4; # LATIN CAPITAL LETTER Y WITH HOOK
   1.470 -01B5; C; 01B6; # LATIN CAPITAL LETTER Z WITH STROKE
   1.471 -01B7; C; 0292; # LATIN CAPITAL LETTER EZH
   1.472 -01B8; C; 01B9; # LATIN CAPITAL LETTER EZH REVERSED
   1.473 -01BC; C; 01BD; # LATIN CAPITAL LETTER TONE FIVE
   1.474 -01C4; C; 01C6; # LATIN CAPITAL LETTER DZ WITH CARON
   1.475 -01C5; C; 01C6; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
   1.476 -01C7; C; 01C9; # LATIN CAPITAL LETTER LJ
   1.477 -01C8; C; 01C9; # LATIN CAPITAL LETTER L WITH SMALL LETTER J
   1.478 -01CA; C; 01CC; # LATIN CAPITAL LETTER NJ
   1.479 -01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J
   1.480 -01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON
   1.481 -01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON
   1.482 -01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON
   1.483 -01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON
   1.484 -01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
   1.485 -01D7; C; 01D8; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
   1.486 -01D9; C; 01DA; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
   1.487 -01DB; C; 01DC; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
   1.488 -01DE; C; 01DF; # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
   1.489 -01E0; C; 01E1; # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
   1.490 -01E2; C; 01E3; # LATIN CAPITAL LETTER AE WITH MACRON
   1.491 -01E4; C; 01E5; # LATIN CAPITAL LETTER G WITH STROKE
   1.492 -01E6; C; 01E7; # LATIN CAPITAL LETTER G WITH CARON
   1.493 -01E8; C; 01E9; # LATIN CAPITAL LETTER K WITH CARON
   1.494 -01EA; C; 01EB; # LATIN CAPITAL LETTER O WITH OGONEK
   1.495 -01EC; C; 01ED; # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
   1.496 -01EE; C; 01EF; # LATIN CAPITAL LETTER EZH WITH CARON
   1.497 -01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
   1.498 -01F1; C; 01F3; # LATIN CAPITAL LETTER DZ
   1.499 -01F2; C; 01F3; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
   1.500 -01F4; C; 01F5; # LATIN CAPITAL LETTER G WITH ACUTE
   1.501 -01F6; C; 0195; # LATIN CAPITAL LETTER HWAIR
   1.502 -01F7; C; 01BF; # LATIN CAPITAL LETTER WYNN
   1.503 -01F8; C; 01F9; # LATIN CAPITAL LETTER N WITH GRAVE
   1.504 -01FA; C; 01FB; # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
   1.505 -01FC; C; 01FD; # LATIN CAPITAL LETTER AE WITH ACUTE
   1.506 -01FE; C; 01FF; # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
   1.507 -0200; C; 0201; # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
   1.508 -0202; C; 0203; # LATIN CAPITAL LETTER A WITH INVERTED BREVE
   1.509 -0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
   1.510 -0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE
   1.511 -0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
   1.512 -020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE
   1.513 -020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
   1.514 -020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE
   1.515 -0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
   1.516 -0212; C; 0213; # LATIN CAPITAL LETTER R WITH INVERTED BREVE
   1.517 -0214; C; 0215; # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
   1.518 -0216; C; 0217; # LATIN CAPITAL LETTER U WITH INVERTED BREVE
   1.519 -0218; C; 0219; # LATIN CAPITAL LETTER S WITH COMMA BELOW
   1.520 -021A; C; 021B; # LATIN CAPITAL LETTER T WITH COMMA BELOW
   1.521 -021C; C; 021D; # LATIN CAPITAL LETTER YOGH
   1.522 -021E; C; 021F; # LATIN CAPITAL LETTER H WITH CARON
   1.523 -0220; C; 019E; # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
   1.524 -0222; C; 0223; # LATIN CAPITAL LETTER OU
   1.525 -0224; C; 0225; # LATIN CAPITAL LETTER Z WITH HOOK
   1.526 -0226; C; 0227; # LATIN CAPITAL LETTER A WITH DOT ABOVE
   1.527 -0228; C; 0229; # LATIN CAPITAL LETTER E WITH CEDILLA
   1.528 -022A; C; 022B; # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
   1.529 -022C; C; 022D; # LATIN CAPITAL LETTER O WITH TILDE AND MACRON
   1.530 -022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE
   1.531 -0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
   1.532 -0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON
   1.533 -023A; C; 2C65; # LATIN CAPITAL LETTER A WITH STROKE
   1.534 -023B; C; 023C; # LATIN CAPITAL LETTER C WITH STROKE
   1.535 -023D; C; 019A; # LATIN CAPITAL LETTER L WITH BAR
   1.536 -023E; C; 2C66; # LATIN CAPITAL LETTER T WITH DIAGONAL STROKE
   1.537 -0241; C; 0242; # LATIN CAPITAL LETTER GLOTTAL STOP
   1.538 -0243; C; 0180; # LATIN CAPITAL LETTER B WITH STROKE
   1.539 -0244; C; 0289; # LATIN CAPITAL LETTER U BAR
   1.540 -0245; C; 028C; # LATIN CAPITAL LETTER TURNED V
   1.541 -0246; C; 0247; # LATIN CAPITAL LETTER E WITH STROKE
   1.542 -0248; C; 0249; # LATIN CAPITAL LETTER J WITH STROKE
   1.543 -024A; C; 024B; # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL
   1.544 -024C; C; 024D; # LATIN CAPITAL LETTER R WITH STROKE
   1.545 -024E; C; 024F; # LATIN CAPITAL LETTER Y WITH STROKE
   1.546 -0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI
   1.547 -0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS
   1.548 -0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS
   1.549 -0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS
   1.550 -038A; C; 03AF; # GREEK CAPITAL LETTER IOTA WITH TONOS
   1.551 -038C; C; 03CC; # GREEK CAPITAL LETTER OMICRON WITH TONOS
   1.552 -038E; C; 03CD; # GREEK CAPITAL LETTER UPSILON WITH TONOS
   1.553 -038F; C; 03CE; # GREEK CAPITAL LETTER OMEGA WITH TONOS
   1.554 -0390; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
   1.555 -0391; C; 03B1; # GREEK CAPITAL LETTER ALPHA
   1.556 -0392; C; 03B2; # GREEK CAPITAL LETTER BETA
   1.557 -0393; C; 03B3; # GREEK CAPITAL LETTER GAMMA
   1.558 -0394; C; 03B4; # GREEK CAPITAL LETTER DELTA
   1.559 -0395; C; 03B5; # GREEK CAPITAL LETTER EPSILON
   1.560 -0396; C; 03B6; # GREEK CAPITAL LETTER ZETA
   1.561 -0397; C; 03B7; # GREEK CAPITAL LETTER ETA
   1.562 -0398; C; 03B8; # GREEK CAPITAL LETTER THETA
   1.563 -0399; C; 03B9; # GREEK CAPITAL LETTER IOTA
   1.564 -039A; C; 03BA; # GREEK CAPITAL LETTER KAPPA
   1.565 -039B; C; 03BB; # GREEK CAPITAL LETTER LAMDA
   1.566 -039C; C; 03BC; # GREEK CAPITAL LETTER MU
   1.567 -039D; C; 03BD; # GREEK CAPITAL LETTER NU
   1.568 -039E; C; 03BE; # GREEK CAPITAL LETTER XI
   1.569 -039F; C; 03BF; # GREEK CAPITAL LETTER OMICRON
   1.570 -03A0; C; 03C0; # GREEK CAPITAL LETTER PI
   1.571 -03A1; C; 03C1; # GREEK CAPITAL LETTER RHO
   1.572 -03A3; C; 03C3; # GREEK CAPITAL LETTER SIGMA
   1.573 -03A4; C; 03C4; # GREEK CAPITAL LETTER TAU
   1.574 -03A5; C; 03C5; # GREEK CAPITAL LETTER UPSILON
   1.575 -03A6; C; 03C6; # GREEK CAPITAL LETTER PHI
   1.576 -03A7; C; 03C7; # GREEK CAPITAL LETTER CHI
   1.577 -03A8; C; 03C8; # GREEK CAPITAL LETTER PSI
   1.578 -03A9; C; 03C9; # GREEK CAPITAL LETTER OMEGA
   1.579 -03AA; C; 03CA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
   1.580 -03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
   1.581 -03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
   1.582 -03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA
   1.583 -03D0; C; 03B2; # GREEK BETA SYMBOL
   1.584 -03D1; C; 03B8; # GREEK THETA SYMBOL
   1.585 -03D5; C; 03C6; # GREEK PHI SYMBOL
   1.586 -03D6; C; 03C0; # GREEK PI SYMBOL
   1.587 -03D8; C; 03D9; # GREEK LETTER ARCHAIC KOPPA
   1.588 -03DA; C; 03DB; # GREEK LETTER STIGMA
   1.589 -03DC; C; 03DD; # GREEK LETTER DIGAMMA
   1.590 -03DE; C; 03DF; # GREEK LETTER KOPPA
   1.591 -03E0; C; 03E1; # GREEK LETTER SAMPI
   1.592 -03E2; C; 03E3; # COPTIC CAPITAL LETTER SHEI
   1.593 -03E4; C; 03E5; # COPTIC CAPITAL LETTER FEI
   1.594 -03E6; C; 03E7; # COPTIC CAPITAL LETTER KHEI
   1.595 -03E8; C; 03E9; # COPTIC CAPITAL LETTER HORI
   1.596 -03EA; C; 03EB; # COPTIC CAPITAL LETTER GANGIA
   1.597 -03EC; C; 03ED; # COPTIC CAPITAL LETTER SHIMA
   1.598 -03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI
   1.599 -03F0; C; 03BA; # GREEK KAPPA SYMBOL
   1.600 -03F1; C; 03C1; # GREEK RHO SYMBOL
   1.601 -03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL
   1.602 -03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL
   1.603 -03F7; C; 03F8; # GREEK CAPITAL LETTER SHO
   1.604 -03F9; C; 03F2; # GREEK CAPITAL LUNATE SIGMA SYMBOL
   1.605 -03FA; C; 03FB; # GREEK CAPITAL LETTER SAN
   1.606 -03FD; C; 037B; # GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL
   1.607 -03FE; C; 037C; # GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL
   1.608 -03FF; C; 037D; # GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
   1.609 -0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE
   1.610 -0401; C; 0451; # CYRILLIC CAPITAL LETTER IO
   1.611 -0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE
   1.612 -0403; C; 0453; # CYRILLIC CAPITAL LETTER GJE
   1.613 -0404; C; 0454; # CYRILLIC CAPITAL LETTER UKRAINIAN IE
   1.614 -0405; C; 0455; # CYRILLIC CAPITAL LETTER DZE
   1.615 -0406; C; 0456; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
   1.616 -0407; C; 0457; # CYRILLIC CAPITAL LETTER YI
   1.617 -0408; C; 0458; # CYRILLIC CAPITAL LETTER JE
   1.618 -0409; C; 0459; # CYRILLIC CAPITAL LETTER LJE
   1.619 -040A; C; 045A; # CYRILLIC CAPITAL LETTER NJE
   1.620 -040B; C; 045B; # CYRILLIC CAPITAL LETTER TSHE
   1.621 -040C; C; 045C; # CYRILLIC CAPITAL LETTER KJE
   1.622 -040D; C; 045D; # CYRILLIC CAPITAL LETTER I WITH GRAVE
   1.623 -040E; C; 045E; # CYRILLIC CAPITAL LETTER SHORT U
   1.624 -040F; C; 045F; # CYRILLIC CAPITAL LETTER DZHE
   1.625 -0410; C; 0430; # CYRILLIC CAPITAL LETTER A
   1.626 -0411; C; 0431; # CYRILLIC CAPITAL LETTER BE
   1.627 -0412; C; 0432; # CYRILLIC CAPITAL LETTER VE
   1.628 -0413; C; 0433; # CYRILLIC CAPITAL LETTER GHE
   1.629 -0414; C; 0434; # CYRILLIC CAPITAL LETTER DE
   1.630 -0415; C; 0435; # CYRILLIC CAPITAL LETTER IE
   1.631 -0416; C; 0436; # CYRILLIC CAPITAL LETTER ZHE
   1.632 -0417; C; 0437; # CYRILLIC CAPITAL LETTER ZE
   1.633 -0418; C; 0438; # CYRILLIC CAPITAL LETTER I
   1.634 -0419; C; 0439; # CYRILLIC CAPITAL LETTER SHORT I
   1.635 -041A; C; 043A; # CYRILLIC CAPITAL LETTER KA
   1.636 -041B; C; 043B; # CYRILLIC CAPITAL LETTER EL
   1.637 -041C; C; 043C; # CYRILLIC CAPITAL LETTER EM
   1.638 -041D; C; 043D; # CYRILLIC CAPITAL LETTER EN
   1.639 -041E; C; 043E; # CYRILLIC CAPITAL LETTER O
   1.640 -041F; C; 043F; # CYRILLIC CAPITAL LETTER PE
   1.641 -0420; C; 0440; # CYRILLIC CAPITAL LETTER ER
   1.642 -0421; C; 0441; # CYRILLIC CAPITAL LETTER ES
   1.643 -0422; C; 0442; # CYRILLIC CAPITAL LETTER TE
   1.644 -0423; C; 0443; # CYRILLIC CAPITAL LETTER U
   1.645 -0424; C; 0444; # CYRILLIC CAPITAL LETTER EF
   1.646 -0425; C; 0445; # CYRILLIC CAPITAL LETTER HA
   1.647 -0426; C; 0446; # CYRILLIC CAPITAL LETTER TSE
   1.648 -0427; C; 0447; # CYRILLIC CAPITAL LETTER CHE
   1.649 -0428; C; 0448; # CYRILLIC CAPITAL LETTER SHA
   1.650 -0429; C; 0449; # CYRILLIC CAPITAL LETTER SHCHA
   1.651 -042A; C; 044A; # CYRILLIC CAPITAL LETTER HARD SIGN
   1.652 -042B; C; 044B; # CYRILLIC CAPITAL LETTER YERU
   1.653 -042C; C; 044C; # CYRILLIC CAPITAL LETTER SOFT SIGN
   1.654 -042D; C; 044D; # CYRILLIC CAPITAL LETTER E
   1.655 -042E; C; 044E; # CYRILLIC CAPITAL LETTER YU
   1.656 -042F; C; 044F; # CYRILLIC CAPITAL LETTER YA
   1.657 -0460; C; 0461; # CYRILLIC CAPITAL LETTER OMEGA
   1.658 -0462; C; 0463; # CYRILLIC CAPITAL LETTER YAT
   1.659 -0464; C; 0465; # CYRILLIC CAPITAL LETTER IOTIFIED E
   1.660 -0466; C; 0467; # CYRILLIC CAPITAL LETTER LITTLE YUS
   1.661 -0468; C; 0469; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
   1.662 -046A; C; 046B; # CYRILLIC CAPITAL LETTER BIG YUS
   1.663 -046C; C; 046D; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
   1.664 -046E; C; 046F; # CYRILLIC CAPITAL LETTER KSI
   1.665 -0470; C; 0471; # CYRILLIC CAPITAL LETTER PSI
   1.666 -0472; C; 0473; # CYRILLIC CAPITAL LETTER FITA
   1.667 -0474; C; 0475; # CYRILLIC CAPITAL LETTER IZHITSA
   1.668 -0476; C; 0477; # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
   1.669 -0478; C; 0479; # CYRILLIC CAPITAL LETTER UK
   1.670 -047A; C; 047B; # CYRILLIC CAPITAL LETTER ROUND OMEGA
   1.671 -047C; C; 047D; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
   1.672 -047E; C; 047F; # CYRILLIC CAPITAL LETTER OT
   1.673 -0480; C; 0481; # CYRILLIC CAPITAL LETTER KOPPA
   1.674 -048A; C; 048B; # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL
   1.675 -048C; C; 048D; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
   1.676 -048E; C; 048F; # CYRILLIC CAPITAL LETTER ER WITH TICK
   1.677 -0490; C; 0491; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
   1.678 -0492; C; 0493; # CYRILLIC CAPITAL LETTER GHE WITH STROKE
   1.679 -0494; C; 0495; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
   1.680 -0496; C; 0497; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
   1.681 -0498; C; 0499; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
   1.682 -049A; C; 049B; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
   1.683 -049C; C; 049D; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
   1.684 -049E; C; 049F; # CYRILLIC CAPITAL LETTER KA WITH STROKE
   1.685 -04A0; C; 04A1; # CYRILLIC CAPITAL LETTER BASHKIR KA
   1.686 -04A2; C; 04A3; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
   1.687 -04A4; C; 04A5; # CYRILLIC CAPITAL LIGATURE EN GHE
   1.688 -04A6; C; 04A7; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
   1.689 -04A8; C; 04A9; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
   1.690 -04AA; C; 04AB; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
   1.691 -04AC; C; 04AD; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
   1.692 -04AE; C; 04AF; # CYRILLIC CAPITAL LETTER STRAIGHT U
   1.693 -04B0; C; 04B1; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
   1.694 -04B2; C; 04B3; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
   1.695 -04B4; C; 04B5; # CYRILLIC CAPITAL LIGATURE TE TSE
   1.696 -04B6; C; 04B7; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
   1.697 -04B8; C; 04B9; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
   1.698 -04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA
   1.699 -04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
   1.700 -04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
   1.701 -04C0; C; 04CF; # CYRILLIC LETTER PALOCHKA
   1.702 -04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE
   1.703 -04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK
   1.704 -04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL
   1.705 -04C7; C; 04C8; # CYRILLIC CAPITAL LETTER EN WITH HOOK
   1.706 -04C9; C; 04CA; # CYRILLIC CAPITAL LETTER EN WITH TAIL
   1.707 -04CB; C; 04CC; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
   1.708 -04CD; C; 04CE; # CYRILLIC CAPITAL LETTER EM WITH TAIL
   1.709 -04D0; C; 04D1; # CYRILLIC CAPITAL LETTER A WITH BREVE
   1.710 -04D2; C; 04D3; # CYRILLIC CAPITAL LETTER A WITH DIAERESIS
   1.711 -04D4; C; 04D5; # CYRILLIC CAPITAL LIGATURE A IE
   1.712 -04D6; C; 04D7; # CYRILLIC CAPITAL LETTER IE WITH BREVE
   1.713 -04D8; C; 04D9; # CYRILLIC CAPITAL LETTER SCHWA
   1.714 -04DA; C; 04DB; # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS
   1.715 -04DC; C; 04DD; # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS
   1.716 -04DE; C; 04DF; # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS
   1.717 -04E0; C; 04E1; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
   1.718 -04E2; C; 04E3; # CYRILLIC CAPITAL LETTER I WITH MACRON
   1.719 -04E4; C; 04E5; # CYRILLIC CAPITAL LETTER I WITH DIAERESIS
   1.720 -04E6; C; 04E7; # CYRILLIC CAPITAL LETTER O WITH DIAERESIS
   1.721 -04E8; C; 04E9; # CYRILLIC CAPITAL LETTER BARRED O
   1.722 -04EA; C; 04EB; # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS
   1.723 -04EC; C; 04ED; # CYRILLIC CAPITAL LETTER E WITH DIAERESIS
   1.724 -04EE; C; 04EF; # CYRILLIC CAPITAL LETTER U WITH MACRON
   1.725 -04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS
   1.726 -04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
   1.727 -04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
   1.728 -04F6; C; 04F7; # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER
   1.729 -04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
   1.730 -04FA; C; 04FB; # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK
   1.731 -04FC; C; 04FD; # CYRILLIC CAPITAL LETTER HA WITH HOOK
   1.732 -04FE; C; 04FF; # CYRILLIC CAPITAL LETTER HA WITH STROKE
   1.733 -0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE
   1.734 -0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE
   1.735 -0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE
   1.736 -0506; C; 0507; # CYRILLIC CAPITAL LETTER KOMI DZJE
   1.737 -0508; C; 0509; # CYRILLIC CAPITAL LETTER KOMI LJE
   1.738 -050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE
   1.739 -050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE
   1.740 -050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE
   1.741 -0510; C; 0511; # CYRILLIC CAPITAL LETTER REVERSED ZE
   1.742 -0512; C; 0513; # CYRILLIC CAPITAL LETTER EL WITH HOOK
   1.743 -0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB
   1.744 -0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN
   1.745 -0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM
   1.746 -0534; C; 0564; # ARMENIAN CAPITAL LETTER DA
   1.747 -0535; C; 0565; # ARMENIAN CAPITAL LETTER ECH
   1.748 -0536; C; 0566; # ARMENIAN CAPITAL LETTER ZA
   1.749 -0537; C; 0567; # ARMENIAN CAPITAL LETTER EH
   1.750 -0538; C; 0568; # ARMENIAN CAPITAL LETTER ET
   1.751 -0539; C; 0569; # ARMENIAN CAPITAL LETTER TO
   1.752 -053A; C; 056A; # ARMENIAN CAPITAL LETTER ZHE
   1.753 -053B; C; 056B; # ARMENIAN CAPITAL LETTER INI
   1.754 -053C; C; 056C; # ARMENIAN CAPITAL LETTER LIWN
   1.755 -053D; C; 056D; # ARMENIAN CAPITAL LETTER XEH
   1.756 -053E; C; 056E; # ARMENIAN CAPITAL LETTER CA
   1.757 -053F; C; 056F; # ARMENIAN CAPITAL LETTER KEN
   1.758 -0540; C; 0570; # ARMENIAN CAPITAL LETTER HO
   1.759 -0541; C; 0571; # ARMENIAN CAPITAL LETTER JA
   1.760 -0542; C; 0572; # ARMENIAN CAPITAL LETTER GHAD
   1.761 -0543; C; 0573; # ARMENIAN CAPITAL LETTER CHEH
   1.762 -0544; C; 0574; # ARMENIAN CAPITAL LETTER MEN
   1.763 -0545; C; 0575; # ARMENIAN CAPITAL LETTER YI
   1.764 -0546; C; 0576; # ARMENIAN CAPITAL LETTER NOW
   1.765 -0547; C; 0577; # ARMENIAN CAPITAL LETTER SHA
   1.766 -0548; C; 0578; # ARMENIAN CAPITAL LETTER VO
   1.767 -0549; C; 0579; # ARMENIAN CAPITAL LETTER CHA
   1.768 -054A; C; 057A; # ARMENIAN CAPITAL LETTER PEH
   1.769 -054B; C; 057B; # ARMENIAN CAPITAL LETTER JHEH
   1.770 -054C; C; 057C; # ARMENIAN CAPITAL LETTER RA
   1.771 -054D; C; 057D; # ARMENIAN CAPITAL LETTER SEH
   1.772 -054E; C; 057E; # ARMENIAN CAPITAL LETTER VEW
   1.773 -054F; C; 057F; # ARMENIAN CAPITAL LETTER TIWN
   1.774 -0550; C; 0580; # ARMENIAN CAPITAL LETTER REH
   1.775 -0551; C; 0581; # ARMENIAN CAPITAL LETTER CO
   1.776 -0552; C; 0582; # ARMENIAN CAPITAL LETTER YIWN
   1.777 -0553; C; 0583; # ARMENIAN CAPITAL LETTER PIWR
   1.778 -0554; C; 0584; # ARMENIAN CAPITAL LETTER KEH
   1.779 -0555; C; 0585; # ARMENIAN CAPITAL LETTER OH
   1.780 -0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH
   1.781 -0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN
   1.782 -10A0; C; 2D00; # GEORGIAN CAPITAL LETTER AN
   1.783 -10A1; C; 2D01; # GEORGIAN CAPITAL LETTER BAN
   1.784 -10A2; C; 2D02; # GEORGIAN CAPITAL LETTER GAN
   1.785 -10A3; C; 2D03; # GEORGIAN CAPITAL LETTER DON
   1.786 -10A4; C; 2D04; # GEORGIAN CAPITAL LETTER EN
   1.787 -10A5; C; 2D05; # GEORGIAN CAPITAL LETTER VIN
   1.788 -10A6; C; 2D06; # GEORGIAN CAPITAL LETTER ZEN
   1.789 -10A7; C; 2D07; # GEORGIAN CAPITAL LETTER TAN
   1.790 -10A8; C; 2D08; # GEORGIAN CAPITAL LETTER IN
   1.791 -10A9; C; 2D09; # GEORGIAN CAPITAL LETTER KAN
   1.792 -10AA; C; 2D0A; # GEORGIAN CAPITAL LETTER LAS
   1.793 -10AB; C; 2D0B; # GEORGIAN CAPITAL LETTER MAN
   1.794 -10AC; C; 2D0C; # GEORGIAN CAPITAL LETTER NAR
   1.795 -10AD; C; 2D0D; # GEORGIAN CAPITAL LETTER ON
   1.796 -10AE; C; 2D0E; # GEORGIAN CAPITAL LETTER PAR
   1.797 -10AF; C; 2D0F; # GEORGIAN CAPITAL LETTER ZHAR
   1.798 -10B0; C; 2D10; # GEORGIAN CAPITAL LETTER RAE
   1.799 -10B1; C; 2D11; # GEORGIAN CAPITAL LETTER SAN
   1.800 -10B2; C; 2D12; # GEORGIAN CAPITAL LETTER TAR
   1.801 -10B3; C; 2D13; # GEORGIAN CAPITAL LETTER UN
   1.802 -10B4; C; 2D14; # GEORGIAN CAPITAL LETTER PHAR
   1.803 -10B5; C; 2D15; # GEORGIAN CAPITAL LETTER KHAR
   1.804 -10B6; C; 2D16; # GEORGIAN CAPITAL LETTER GHAN
   1.805 -10B7; C; 2D17; # GEORGIAN CAPITAL LETTER QAR
   1.806 -10B8; C; 2D18; # GEORGIAN CAPITAL LETTER SHIN
   1.807 -10B9; C; 2D19; # GEORGIAN CAPITAL LETTER CHIN
   1.808 -10BA; C; 2D1A; # GEORGIAN CAPITAL LETTER CAN
   1.809 -10BB; C; 2D1B; # GEORGIAN CAPITAL LETTER JIL
   1.810 -10BC; C; 2D1C; # GEORGIAN CAPITAL LETTER CIL
   1.811 -10BD; C; 2D1D; # GEORGIAN CAPITAL LETTER CHAR
   1.812 -10BE; C; 2D1E; # GEORGIAN CAPITAL LETTER XAN
   1.813 -10BF; C; 2D1F; # GEORGIAN CAPITAL LETTER JHAN
   1.814 -10C0; C; 2D20; # GEORGIAN CAPITAL LETTER HAE
   1.815 -10C1; C; 2D21; # GEORGIAN CAPITAL LETTER HE
   1.816 -10C2; C; 2D22; # GEORGIAN CAPITAL LETTER HIE
   1.817 -10C3; C; 2D23; # GEORGIAN CAPITAL LETTER WE
   1.818 -10C4; C; 2D24; # GEORGIAN CAPITAL LETTER HAR
   1.819 -10C5; C; 2D25; # GEORGIAN CAPITAL LETTER HOE
   1.820 -1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW
   1.821 -1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE
   1.822 -1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW
   1.823 -1E06; C; 1E07; # LATIN CAPITAL LETTER B WITH LINE BELOW
   1.824 -1E08; C; 1E09; # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
   1.825 -1E0A; C; 1E0B; # LATIN CAPITAL LETTER D WITH DOT ABOVE
   1.826 -1E0C; C; 1E0D; # LATIN CAPITAL LETTER D WITH DOT BELOW
   1.827 -1E0E; C; 1E0F; # LATIN CAPITAL LETTER D WITH LINE BELOW
   1.828 -1E10; C; 1E11; # LATIN CAPITAL LETTER D WITH CEDILLA
   1.829 -1E12; C; 1E13; # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
   1.830 -1E14; C; 1E15; # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
   1.831 -1E16; C; 1E17; # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
   1.832 -1E18; C; 1E19; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
   1.833 -1E1A; C; 1E1B; # LATIN CAPITAL LETTER E WITH TILDE BELOW
   1.834 -1E1C; C; 1E1D; # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
   1.835 -1E1E; C; 1E1F; # LATIN CAPITAL LETTER F WITH DOT ABOVE
   1.836 -1E20; C; 1E21; # LATIN CAPITAL LETTER G WITH MACRON
   1.837 -1E22; C; 1E23; # LATIN CAPITAL LETTER H WITH DOT ABOVE
   1.838 -1E24; C; 1E25; # LATIN CAPITAL LETTER H WITH DOT BELOW
   1.839 -1E26; C; 1E27; # LATIN CAPITAL LETTER H WITH DIAERESIS
   1.840 -1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA
   1.841 -1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW
   1.842 -1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW
   1.843 -1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
   1.844 -1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE
   1.845 -1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW
   1.846 -1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW
   1.847 -1E36; C; 1E37; # LATIN CAPITAL LETTER L WITH DOT BELOW
   1.848 -1E38; C; 1E39; # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
   1.849 -1E3A; C; 1E3B; # LATIN CAPITAL LETTER L WITH LINE BELOW
   1.850 -1E3C; C; 1E3D; # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
   1.851 -1E3E; C; 1E3F; # LATIN CAPITAL LETTER M WITH ACUTE
   1.852 -1E40; C; 1E41; # LATIN CAPITAL LETTER M WITH DOT ABOVE
   1.853 -1E42; C; 1E43; # LATIN CAPITAL LETTER M WITH DOT BELOW
   1.854 -1E44; C; 1E45; # LATIN CAPITAL LETTER N WITH DOT ABOVE
   1.855 -1E46; C; 1E47; # LATIN CAPITAL LETTER N WITH DOT BELOW
   1.856 -1E48; C; 1E49; # LATIN CAPITAL LETTER N WITH LINE BELOW
   1.857 -1E4A; C; 1E4B; # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
   1.858 -1E4C; C; 1E4D; # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
   1.859 -1E4E; C; 1E4F; # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
   1.860 -1E50; C; 1E51; # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
   1.861 -1E52; C; 1E53; # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
   1.862 -1E54; C; 1E55; # LATIN CAPITAL LETTER P WITH ACUTE
   1.863 -1E56; C; 1E57; # LATIN CAPITAL LETTER P WITH DOT ABOVE
   1.864 -1E58; C; 1E59; # LATIN CAPITAL LETTER R WITH DOT ABOVE
   1.865 -1E5A; C; 1E5B; # LATIN CAPITAL LETTER R WITH DOT BELOW
   1.866 -1E5C; C; 1E5D; # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
   1.867 -1E5E; C; 1E5F; # LATIN CAPITAL LETTER R WITH LINE BELOW
   1.868 -1E60; C; 1E61; # LATIN CAPITAL LETTER S WITH DOT ABOVE
   1.869 -1E62; C; 1E63; # LATIN CAPITAL LETTER S WITH DOT BELOW
   1.870 -1E64; C; 1E65; # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
   1.871 -1E66; C; 1E67; # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
   1.872 -1E68; C; 1E69; # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
   1.873 -1E6A; C; 1E6B; # LATIN CAPITAL LETTER T WITH DOT ABOVE
   1.874 -1E6C; C; 1E6D; # LATIN CAPITAL LETTER T WITH DOT BELOW
   1.875 -1E6E; C; 1E6F; # LATIN CAPITAL LETTER T WITH LINE BELOW
   1.876 -1E70; C; 1E71; # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
   1.877 -1E72; C; 1E73; # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
   1.878 -1E74; C; 1E75; # LATIN CAPITAL LETTER U WITH TILDE BELOW
   1.879 -1E76; C; 1E77; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
   1.880 -1E78; C; 1E79; # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
   1.881 -1E7A; C; 1E7B; # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
   1.882 -1E7C; C; 1E7D; # LATIN CAPITAL LETTER V WITH TILDE
   1.883 -1E7E; C; 1E7F; # LATIN CAPITAL LETTER V WITH DOT BELOW
   1.884 -1E80; C; 1E81; # LATIN CAPITAL LETTER W WITH GRAVE
   1.885 -1E82; C; 1E83; # LATIN CAPITAL LETTER W WITH ACUTE
   1.886 -1E84; C; 1E85; # LATIN CAPITAL LETTER W WITH DIAERESIS
   1.887 -1E86; C; 1E87; # LATIN CAPITAL LETTER W WITH DOT ABOVE
   1.888 -1E88; C; 1E89; # LATIN CAPITAL LETTER W WITH DOT BELOW
   1.889 -1E8A; C; 1E8B; # LATIN CAPITAL LETTER X WITH DOT ABOVE
   1.890 -1E8C; C; 1E8D; # LATIN CAPITAL LETTER X WITH DIAERESIS
   1.891 -1E8E; C; 1E8F; # LATIN CAPITAL LETTER Y WITH DOT ABOVE
   1.892 -1E90; C; 1E91; # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
   1.893 -1E92; C; 1E93; # LATIN CAPITAL LETTER Z WITH DOT BELOW
   1.894 -1E94; C; 1E95; # LATIN CAPITAL LETTER Z WITH LINE BELOW
   1.895 -1E96; F; 0068 0331; # LATIN SMALL LETTER H WITH LINE BELOW
   1.896 -1E97; F; 0074 0308; # LATIN SMALL LETTER T WITH DIAERESIS
   1.897 -1E98; F; 0077 030A; # LATIN SMALL LETTER W WITH RING ABOVE
   1.898 -1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE
   1.899 -1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING
   1.900 -1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE
   1.901 -1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW
   1.902 -1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE
   1.903 -1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
   1.904 -1EA6; C; 1EA7; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
   1.905 -1EA8; C; 1EA9; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
   1.906 -1EAA; C; 1EAB; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
   1.907 -1EAC; C; 1EAD; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
   1.908 -1EAE; C; 1EAF; # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
   1.909 -1EB0; C; 1EB1; # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
   1.910 -1EB2; C; 1EB3; # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
   1.911 -1EB4; C; 1EB5; # LATIN CAPITAL LETTER A WITH BREVE AND TILDE
   1.912 -1EB6; C; 1EB7; # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
   1.913 -1EB8; C; 1EB9; # LATIN CAPITAL LETTER E WITH DOT BELOW
   1.914 -1EBA; C; 1EBB; # LATIN CAPITAL LETTER E WITH HOOK ABOVE
   1.915 -1EBC; C; 1EBD; # LATIN CAPITAL LETTER E WITH TILDE
   1.916 -1EBE; C; 1EBF; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
   1.917 -1EC0; C; 1EC1; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
   1.918 -1EC2; C; 1EC3; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
   1.919 -1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
   1.920 -1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
   1.921 -1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE
   1.922 -1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW
   1.923 -1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW
   1.924 -1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE
   1.925 -1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
   1.926 -1ED2; C; 1ED3; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
   1.927 -1ED4; C; 1ED5; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
   1.928 -1ED6; C; 1ED7; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
   1.929 -1ED8; C; 1ED9; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
   1.930 -1EDA; C; 1EDB; # LATIN CAPITAL LETTER O WITH HORN AND ACUTE
   1.931 -1EDC; C; 1EDD; # LATIN CAPITAL LETTER O WITH HORN AND GRAVE
   1.932 -1EDE; C; 1EDF; # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
   1.933 -1EE0; C; 1EE1; # LATIN CAPITAL LETTER O WITH HORN AND TILDE
   1.934 -1EE2; C; 1EE3; # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
   1.935 -1EE4; C; 1EE5; # LATIN CAPITAL LETTER U WITH DOT BELOW
   1.936 -1EE6; C; 1EE7; # LATIN CAPITAL LETTER U WITH HOOK ABOVE
   1.937 -1EE8; C; 1EE9; # LATIN CAPITAL LETTER U WITH HORN AND ACUTE
   1.938 -1EEA; C; 1EEB; # LATIN CAPITAL LETTER U WITH HORN AND GRAVE
   1.939 -1EEC; C; 1EED; # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
   1.940 -1EEE; C; 1EEF; # LATIN CAPITAL LETTER U WITH HORN AND TILDE
   1.941 -1EF0; C; 1EF1; # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
   1.942 -1EF2; C; 1EF3; # LATIN CAPITAL LETTER Y WITH GRAVE
   1.943 -1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW
   1.944 -1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE
   1.945 -1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE
   1.946 -1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI
   1.947 -1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA
   1.948 -1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA
   1.949 -1F0B; C; 1F03; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA
   1.950 -1F0C; C; 1F04; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA
   1.951 -1F0D; C; 1F05; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA
   1.952 -1F0E; C; 1F06; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI
   1.953 -1F0F; C; 1F07; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
   1.954 -1F18; C; 1F10; # GREEK CAPITAL LETTER EPSILON WITH PSILI
   1.955 -1F19; C; 1F11; # GREEK CAPITAL LETTER EPSILON WITH DASIA
   1.956 -1F1A; C; 1F12; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA
   1.957 -1F1B; C; 1F13; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA
   1.958 -1F1C; C; 1F14; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA
   1.959 -1F1D; C; 1F15; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
   1.960 -1F28; C; 1F20; # GREEK CAPITAL LETTER ETA WITH PSILI
   1.961 -1F29; C; 1F21; # GREEK CAPITAL LETTER ETA WITH DASIA
   1.962 -1F2A; C; 1F22; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA
   1.963 -1F2B; C; 1F23; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA
   1.964 -1F2C; C; 1F24; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA
   1.965 -1F2D; C; 1F25; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA
   1.966 -1F2E; C; 1F26; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI
   1.967 -1F2F; C; 1F27; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
   1.968 -1F38; C; 1F30; # GREEK CAPITAL LETTER IOTA WITH PSILI
   1.969 -1F39; C; 1F31; # GREEK CAPITAL LETTER IOTA WITH DASIA
   1.970 -1F3A; C; 1F32; # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA
   1.971 -1F3B; C; 1F33; # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA
   1.972 -1F3C; C; 1F34; # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA
   1.973 -1F3D; C; 1F35; # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA
   1.974 -1F3E; C; 1F36; # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI
   1.975 -1F3F; C; 1F37; # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI
   1.976 -1F48; C; 1F40; # GREEK CAPITAL LETTER OMICRON WITH PSILI
   1.977 -1F49; C; 1F41; # GREEK CAPITAL LETTER OMICRON WITH DASIA
   1.978 -1F4A; C; 1F42; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA
   1.979 -1F4B; C; 1F43; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA
   1.980 -1F4C; C; 1F44; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA
   1.981 -1F4D; C; 1F45; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
   1.982 -1F50; F; 03C5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI
   1.983 -1F52; F; 03C5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
   1.984 -1F54; F; 03C5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
   1.985 -1F56; F; 03C5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
   1.986 -1F59; C; 1F51; # GREEK CAPITAL LETTER UPSILON WITH DASIA
   1.987 -1F5B; C; 1F53; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
   1.988 -1F5D; C; 1F55; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
   1.989 -1F5F; C; 1F57; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
   1.990 -1F68; C; 1F60; # GREEK CAPITAL LETTER OMEGA WITH PSILI
   1.991 -1F69; C; 1F61; # GREEK CAPITAL LETTER OMEGA WITH DASIA
   1.992 -1F6A; C; 1F62; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA
   1.993 -1F6B; C; 1F63; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA
   1.994 -1F6C; C; 1F64; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA
   1.995 -1F6D; C; 1F65; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA
   1.996 -1F6E; C; 1F66; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI
   1.997 -1F6F; C; 1F67; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
   1.998 -1F80; F; 1F00 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
   1.999 -1F81; F; 1F01 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
  1.1000 -1F82; F; 1F02 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
  1.1001 -1F83; F; 1F03 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
  1.1002 -1F84; F; 1F04 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
  1.1003 -1F85; F; 1F05 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
  1.1004 -1F86; F; 1F06 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
  1.1005 -1F87; F; 1F07 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
  1.1006 -1F88; F; 1F00 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
  1.1007 -1F88; S; 1F80; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
  1.1008 -1F89; F; 1F01 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
  1.1009 -1F89; S; 1F81; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
  1.1010 -1F8A; F; 1F02 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
  1.1011 -1F8A; S; 1F82; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
  1.1012 -1F8B; F; 1F03 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
  1.1013 -1F8B; S; 1F83; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
  1.1014 -1F8C; F; 1F04 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
  1.1015 -1F8C; S; 1F84; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
  1.1016 -1F8D; F; 1F05 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
  1.1017 -1F8D; S; 1F85; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
  1.1018 -1F8E; F; 1F06 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
  1.1019 -1F8E; S; 1F86; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
  1.1020 -1F8F; F; 1F07 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
  1.1021 -1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
  1.1022 -1F90; F; 1F20 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
  1.1023 -1F91; F; 1F21 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
  1.1024 -1F92; F; 1F22 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
  1.1025 -1F93; F; 1F23 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
  1.1026 -1F94; F; 1F24 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
  1.1027 -1F95; F; 1F25 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
  1.1028 -1F96; F; 1F26 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
  1.1029 -1F97; F; 1F27 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
  1.1030 -1F98; F; 1F20 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
  1.1031 -1F98; S; 1F90; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
  1.1032 -1F99; F; 1F21 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
  1.1033 -1F99; S; 1F91; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
  1.1034 -1F9A; F; 1F22 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
  1.1035 -1F9A; S; 1F92; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
  1.1036 -1F9B; F; 1F23 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
  1.1037 -1F9B; S; 1F93; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
  1.1038 -1F9C; F; 1F24 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
  1.1039 -1F9C; S; 1F94; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
  1.1040 -1F9D; F; 1F25 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
  1.1041 -1F9D; S; 1F95; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
  1.1042 -1F9E; F; 1F26 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
  1.1043 -1F9E; S; 1F96; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
  1.1044 -1F9F; F; 1F27 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
  1.1045 -1F9F; S; 1F97; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
  1.1046 -1FA0; F; 1F60 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
  1.1047 -1FA1; F; 1F61 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
  1.1048 -1FA2; F; 1F62 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
  1.1049 -1FA3; F; 1F63 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
  1.1050 -1FA4; F; 1F64 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
  1.1051 -1FA5; F; 1F65 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
  1.1052 -1FA6; F; 1F66 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
  1.1053 -1FA7; F; 1F67 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
  1.1054 -1FA8; F; 1F60 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
  1.1055 -1FA8; S; 1FA0; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
  1.1056 -1FA9; F; 1F61 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
  1.1057 -1FA9; S; 1FA1; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
  1.1058 -1FAA; F; 1F62 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
  1.1059 -1FAA; S; 1FA2; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
  1.1060 -1FAB; F; 1F63 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
  1.1061 -1FAB; S; 1FA3; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
  1.1062 -1FAC; F; 1F64 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
  1.1063 -1FAC; S; 1FA4; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
  1.1064 -1FAD; F; 1F65 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
  1.1065 -1FAD; S; 1FA5; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
  1.1066 -1FAE; F; 1F66 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
  1.1067 -1FAE; S; 1FA6; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
  1.1068 -1FAF; F; 1F67 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
  1.1069 -1FAF; S; 1FA7; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
  1.1070 -1FB2; F; 1F70 03B9; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
  1.1071 -1FB3; F; 03B1 03B9; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
  1.1072 -1FB4; F; 03AC 03B9; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
  1.1073 -1FB6; F; 03B1 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI
  1.1074 -1FB7; F; 03B1 0342 03B9; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
  1.1075 -1FB8; C; 1FB0; # GREEK CAPITAL LETTER ALPHA WITH VRACHY
  1.1076 -1FB9; C; 1FB1; # GREEK CAPITAL LETTER ALPHA WITH MACRON
  1.1077 -1FBA; C; 1F70; # GREEK CAPITAL LETTER ALPHA WITH VARIA
  1.1078 -1FBB; C; 1F71; # GREEK CAPITAL LETTER ALPHA WITH OXIA
  1.1079 -1FBC; F; 03B1 03B9; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
  1.1080 -1FBC; S; 1FB3; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
  1.1081 -1FBE; C; 03B9; # GREEK PROSGEGRAMMENI
  1.1082 -1FC2; F; 1F74 03B9; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
  1.1083 -1FC3; F; 03B7 03B9; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
  1.1084 -1FC4; F; 03AE 03B9; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
  1.1085 -1FC6; F; 03B7 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI
  1.1086 -1FC7; F; 03B7 0342 03B9; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
  1.1087 -1FC8; C; 1F72; # GREEK CAPITAL LETTER EPSILON WITH VARIA
  1.1088 -1FC9; C; 1F73; # GREEK CAPITAL LETTER EPSILON WITH OXIA
  1.1089 -1FCA; C; 1F74; # GREEK CAPITAL LETTER ETA WITH VARIA
  1.1090 -1FCB; C; 1F75; # GREEK CAPITAL LETTER ETA WITH OXIA
  1.1091 -1FCC; F; 03B7 03B9; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
  1.1092 -1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
  1.1093 -1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
  1.1094 -1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
  1.1095 -1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI
  1.1096 -1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
  1.1097 -1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY
  1.1098 -1FD9; C; 1FD1; # GREEK CAPITAL LETTER IOTA WITH MACRON
  1.1099 -1FDA; C; 1F76; # GREEK CAPITAL LETTER IOTA WITH VARIA
  1.1100 -1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA
  1.1101 -1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
  1.1102 -1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
  1.1103 -1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI
  1.1104 -1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
  1.1105 -1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
  1.1106 -1FE8; C; 1FE0; # GREEK CAPITAL LETTER UPSILON WITH VRACHY
  1.1107 -1FE9; C; 1FE1; # GREEK CAPITAL LETTER UPSILON WITH MACRON
  1.1108 -1FEA; C; 1F7A; # GREEK CAPITAL LETTER UPSILON WITH VARIA
  1.1109 -1FEB; C; 1F7B; # GREEK CAPITAL LETTER UPSILON WITH OXIA
  1.1110 -1FEC; C; 1FE5; # GREEK CAPITAL LETTER RHO WITH DASIA
  1.1111 -1FF2; F; 1F7C 03B9; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
  1.1112 -1FF3; F; 03C9 03B9; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
  1.1113 -1FF4; F; 03CE 03B9; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
  1.1114 -1FF6; F; 03C9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
  1.1115 -1FF7; F; 03C9 0342 03B9; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
  1.1116 -1FF8; C; 1F78; # GREEK CAPITAL LETTER OMICRON WITH VARIA
  1.1117 -1FF9; C; 1F79; # GREEK CAPITAL LETTER OMICRON WITH OXIA
  1.1118 -1FFA; C; 1F7C; # GREEK CAPITAL LETTER OMEGA WITH VARIA
  1.1119 -1FFB; C; 1F7D; # GREEK CAPITAL LETTER OMEGA WITH OXIA
  1.1120 -1FFC; F; 03C9 03B9; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
  1.1121 -1FFC; S; 1FF3; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
  1.1122 -2126; C; 03C9; # OHM SIGN
  1.1123 -212A; C; 006B; # KELVIN SIGN
  1.1124 -212B; C; 00E5; # ANGSTROM SIGN
  1.1125 -2132; C; 214E; # TURNED CAPITAL F
  1.1126 -2160; C; 2170; # ROMAN NUMERAL ONE
  1.1127 -2161; C; 2171; # ROMAN NUMERAL TWO
  1.1128 -2162; C; 2172; # ROMAN NUMERAL THREE
  1.1129 -2163; C; 2173; # ROMAN NUMERAL FOUR
  1.1130 -2164; C; 2174; # ROMAN NUMERAL FIVE
  1.1131 -2165; C; 2175; # ROMAN NUMERAL SIX
  1.1132 -2166; C; 2176; # ROMAN NUMERAL SEVEN
  1.1133 -2167; C; 2177; # ROMAN NUMERAL EIGHT
  1.1134 -2168; C; 2178; # ROMAN NUMERAL NINE
  1.1135 -2169; C; 2179; # ROMAN NUMERAL TEN
  1.1136 -216A; C; 217A; # ROMAN NUMERAL ELEVEN
  1.1137 -216B; C; 217B; # ROMAN NUMERAL TWELVE
  1.1138 -216C; C; 217C; # ROMAN NUMERAL FIFTY
  1.1139 -216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED
  1.1140 -216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED
  1.1141 -216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND
  1.1142 -2183; C; 2184; # ROMAN NUMERAL REVERSED ONE HUNDRED
  1.1143 -24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A
  1.1144 -24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B
  1.1145 -24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C
  1.1146 -24B9; C; 24D3; # CIRCLED LATIN CAPITAL LETTER D
  1.1147 -24BA; C; 24D4; # CIRCLED LATIN CAPITAL LETTER E
  1.1148 -24BB; C; 24D5; # CIRCLED LATIN CAPITAL LETTER F
  1.1149 -24BC; C; 24D6; # CIRCLED LATIN CAPITAL LETTER G
  1.1150 -24BD; C; 24D7; # CIRCLED LATIN CAPITAL LETTER H
  1.1151 -24BE; C; 24D8; # CIRCLED LATIN CAPITAL LETTER I
  1.1152 -24BF; C; 24D9; # CIRCLED LATIN CAPITAL LETTER J
  1.1153 -24C0; C; 24DA; # CIRCLED LATIN CAPITAL LETTER K
  1.1154 -24C1; C; 24DB; # CIRCLED LATIN CAPITAL LETTER L
  1.1155 -24C2; C; 24DC; # CIRCLED LATIN CAPITAL LETTER M
  1.1156 -24C3; C; 24DD; # CIRCLED LATIN CAPITAL LETTER N
  1.1157 -24C4; C; 24DE; # CIRCLED LATIN CAPITAL LETTER O
  1.1158 -24C5; C; 24DF; # CIRCLED LATIN CAPITAL LETTER P
  1.1159 -24C6; C; 24E0; # CIRCLED LATIN CAPITAL LETTER Q
  1.1160 -24C7; C; 24E1; # CIRCLED LATIN CAPITAL LETTER R
  1.1161 -24C8; C; 24E2; # CIRCLED LATIN CAPITAL LETTER S
  1.1162 -24C9; C; 24E3; # CIRCLED LATIN CAPITAL LETTER T
  1.1163 -24CA; C; 24E4; # CIRCLED LATIN CAPITAL LETTER U
  1.1164 -24CB; C; 24E5; # CIRCLED LATIN CAPITAL LETTER V
  1.1165 -24CC; C; 24E6; # CIRCLED LATIN CAPITAL LETTER W
  1.1166 -24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X
  1.1167 -24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y
  1.1168 -24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z
  1.1169 -2C00; C; 2C30; # GLAGOLITIC CAPITAL LETTER AZU
  1.1170 -2C01; C; 2C31; # GLAGOLITIC CAPITAL LETTER BUKY
  1.1171 -2C02; C; 2C32; # GLAGOLITIC CAPITAL LETTER VEDE
  1.1172 -2C03; C; 2C33; # GLAGOLITIC CAPITAL LETTER GLAGOLI
  1.1173 -2C04; C; 2C34; # GLAGOLITIC CAPITAL LETTER DOBRO
  1.1174 -2C05; C; 2C35; # GLAGOLITIC CAPITAL LETTER YESTU
  1.1175 -2C06; C; 2C36; # GLAGOLITIC CAPITAL LETTER ZHIVETE
  1.1176 -2C07; C; 2C37; # GLAGOLITIC CAPITAL LETTER DZELO
  1.1177 -2C08; C; 2C38; # GLAGOLITIC CAPITAL LETTER ZEMLJA
  1.1178 -2C09; C; 2C39; # GLAGOLITIC CAPITAL LETTER IZHE
  1.1179 -2C0A; C; 2C3A; # GLAGOLITIC CAPITAL LETTER INITIAL IZHE
  1.1180 -2C0B; C; 2C3B; # GLAGOLITIC CAPITAL LETTER I
  1.1181 -2C0C; C; 2C3C; # GLAGOLITIC CAPITAL LETTER DJERVI
  1.1182 -2C0D; C; 2C3D; # GLAGOLITIC CAPITAL LETTER KAKO
  1.1183 -2C0E; C; 2C3E; # GLAGOLITIC CAPITAL LETTER LJUDIJE
  1.1184 -2C0F; C; 2C3F; # GLAGOLITIC CAPITAL LETTER MYSLITE
  1.1185 -2C10; C; 2C40; # GLAGOLITIC CAPITAL LETTER NASHI
  1.1186 -2C11; C; 2C41; # GLAGOLITIC CAPITAL LETTER ONU
  1.1187 -2C12; C; 2C42; # GLAGOLITIC CAPITAL LETTER POKOJI
  1.1188 -2C13; C; 2C43; # GLAGOLITIC CAPITAL LETTER RITSI
  1.1189 -2C14; C; 2C44; # GLAGOLITIC CAPITAL LETTER SLOVO
  1.1190 -2C15; C; 2C45; # GLAGOLITIC CAPITAL LETTER TVRIDO
  1.1191 -2C16; C; 2C46; # GLAGOLITIC CAPITAL LETTER UKU
  1.1192 -2C17; C; 2C47; # GLAGOLITIC CAPITAL LETTER FRITU
  1.1193 -2C18; C; 2C48; # GLAGOLITIC CAPITAL LETTER HERU
  1.1194 -2C19; C; 2C49; # GLAGOLITIC CAPITAL LETTER OTU
  1.1195 -2C1A; C; 2C4A; # GLAGOLITIC CAPITAL LETTER PE
  1.1196 -2C1B; C; 2C4B; # GLAGOLITIC CAPITAL LETTER SHTA
  1.1197 -2C1C; C; 2C4C; # GLAGOLITIC CAPITAL LETTER TSI
  1.1198 -2C1D; C; 2C4D; # GLAGOLITIC CAPITAL LETTER CHRIVI
  1.1199 -2C1E; C; 2C4E; # GLAGOLITIC CAPITAL LETTER SHA
  1.1200 -2C1F; C; 2C4F; # GLAGOLITIC CAPITAL LETTER YERU
  1.1201 -2C20; C; 2C50; # GLAGOLITIC CAPITAL LETTER YERI
  1.1202 -2C21; C; 2C51; # GLAGOLITIC CAPITAL LETTER YATI
  1.1203 -2C22; C; 2C52; # GLAGOLITIC CAPITAL LETTER SPIDERY HA
  1.1204 -2C23; C; 2C53; # GLAGOLITIC CAPITAL LETTER YU
  1.1205 -2C24; C; 2C54; # GLAGOLITIC CAPITAL LETTER SMALL YUS
  1.1206 -2C25; C; 2C55; # GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL
  1.1207 -2C26; C; 2C56; # GLAGOLITIC CAPITAL LETTER YO
  1.1208 -2C27; C; 2C57; # GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS
  1.1209 -2C28; C; 2C58; # GLAGOLITIC CAPITAL LETTER BIG YUS
  1.1210 -2C29; C; 2C59; # GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS
  1.1211 -2C2A; C; 2C5A; # GLAGOLITIC CAPITAL LETTER FITA
  1.1212 -2C2B; C; 2C5B; # GLAGOLITIC CAPITAL LETTER IZHITSA
  1.1213 -2C2C; C; 2C5C; # GLAGOLITIC CAPITAL LETTER SHTAPIC
  1.1214 -2C2D; C; 2C5D; # GLAGOLITIC CAPITAL LETTER TROKUTASTI A
  1.1215 -2C2E; C; 2C5E; # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
  1.1216 -2C60; C; 2C61; # LATIN CAPITAL LETTER L WITH DOUBLE BAR
  1.1217 -2C62; C; 026B; # LATIN CAPITAL LETTER L WITH MIDDLE TILDE
  1.1218 -2C63; C; 1D7D; # LATIN CAPITAL LETTER P WITH STROKE
  1.1219 -2C64; C; 027D; # LATIN CAPITAL LETTER R WITH TAIL
  1.1220 -2C67; C; 2C68; # LATIN CAPITAL LETTER H WITH DESCENDER
  1.1221 -2C69; C; 2C6A; # LATIN CAPITAL LETTER K WITH DESCENDER
  1.1222 -2C6B; C; 2C6C; # LATIN CAPITAL LETTER Z WITH DESCENDER
  1.1223 -2C75; C; 2C76; # LATIN CAPITAL LETTER HALF H
  1.1224 -2C80; C; 2C81; # COPTIC CAPITAL LETTER ALFA
  1.1225 -2C82; C; 2C83; # COPTIC CAPITAL LETTER VIDA
  1.1226 -2C84; C; 2C85; # COPTIC CAPITAL LETTER GAMMA
  1.1227 -2C86; C; 2C87; # COPTIC CAPITAL LETTER DALDA
  1.1228 -2C88; C; 2C89; # COPTIC CAPITAL LETTER EIE
  1.1229 -2C8A; C; 2C8B; # COPTIC CAPITAL LETTER SOU
  1.1230 -2C8C; C; 2C8D; # COPTIC CAPITAL LETTER ZATA
  1.1231 -2C8E; C; 2C8F; # COPTIC CAPITAL LETTER HATE
  1.1232 -2C90; C; 2C91; # COPTIC CAPITAL LETTER THETHE
  1.1233 -2C92; C; 2C93; # COPTIC CAPITAL LETTER IAUDA
  1.1234 -2C94; C; 2C95; # COPTIC CAPITAL LETTER KAPA
  1.1235 -2C96; C; 2C97; # COPTIC CAPITAL LETTER LAULA
  1.1236 -2C98; C; 2C99; # COPTIC CAPITAL LETTER MI
  1.1237 -2C9A; C; 2C9B; # COPTIC CAPITAL LETTER NI
  1.1238 -2C9C; C; 2C9D; # COPTIC CAPITAL LETTER KSI
  1.1239 -2C9E; C; 2C9F; # COPTIC CAPITAL LETTER O
  1.1240 -2CA0; C; 2CA1; # COPTIC CAPITAL LETTER PI
  1.1241 -2CA2; C; 2CA3; # COPTIC CAPITAL LETTER RO
  1.1242 -2CA4; C; 2CA5; # COPTIC CAPITAL LETTER SIMA
  1.1243 -2CA6; C; 2CA7; # COPTIC CAPITAL LETTER TAU
  1.1244 -2CA8; C; 2CA9; # COPTIC CAPITAL LETTER UA
  1.1245 -2CAA; C; 2CAB; # COPTIC CAPITAL LETTER FI
  1.1246 -2CAC; C; 2CAD; # COPTIC CAPITAL LETTER KHI
  1.1247 -2CAE; C; 2CAF; # COPTIC CAPITAL LETTER PSI
  1.1248 -2CB0; C; 2CB1; # COPTIC CAPITAL LETTER OOU
  1.1249 -2CB2; C; 2CB3; # COPTIC CAPITAL LETTER DIALECT-P ALEF
  1.1250 -2CB4; C; 2CB5; # COPTIC CAPITAL LETTER OLD COPTIC AIN
  1.1251 -2CB6; C; 2CB7; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE
  1.1252 -2CB8; C; 2CB9; # COPTIC CAPITAL LETTER DIALECT-P KAPA
  1.1253 -2CBA; C; 2CBB; # COPTIC CAPITAL LETTER DIALECT-P NI
  1.1254 -2CBC; C; 2CBD; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI
  1.1255 -2CBE; C; 2CBF; # COPTIC CAPITAL LETTER OLD COPTIC OOU
  1.1256 -2CC0; C; 2CC1; # COPTIC CAPITAL LETTER SAMPI
  1.1257 -2CC2; C; 2CC3; # COPTIC CAPITAL LETTER CROSSED SHEI
  1.1258 -2CC4; C; 2CC5; # COPTIC CAPITAL LETTER OLD COPTIC SHEI
  1.1259 -2CC6; C; 2CC7; # COPTIC CAPITAL LETTER OLD COPTIC ESH
  1.1260 -2CC8; C; 2CC9; # COPTIC CAPITAL LETTER AKHMIMIC KHEI
  1.1261 -2CCA; C; 2CCB; # COPTIC CAPITAL LETTER DIALECT-P HORI
  1.1262 -2CCC; C; 2CCD; # COPTIC CAPITAL LETTER OLD COPTIC HORI
  1.1263 -2CCE; C; 2CCF; # COPTIC CAPITAL LETTER OLD COPTIC HA
  1.1264 -2CD0; C; 2CD1; # COPTIC CAPITAL LETTER L-SHAPED HA
  1.1265 -2CD2; C; 2CD3; # COPTIC CAPITAL LETTER OLD COPTIC HEI
  1.1266 -2CD4; C; 2CD5; # COPTIC CAPITAL LETTER OLD COPTIC HAT
  1.1267 -2CD6; C; 2CD7; # COPTIC CAPITAL LETTER OLD COPTIC GANGIA
  1.1268 -2CD8; C; 2CD9; # COPTIC CAPITAL LETTER OLD COPTIC DJA
  1.1269 -2CDA; C; 2CDB; # COPTIC CAPITAL LETTER OLD COPTIC SHIMA
  1.1270 -2CDC; C; 2CDD; # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA
  1.1271 -2CDE; C; 2CDF; # COPTIC CAPITAL LETTER OLD NUBIAN NGI
  1.1272 -2CE0; C; 2CE1; # COPTIC CAPITAL LETTER OLD NUBIAN NYI
  1.1273 -2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU
  1.1274 -FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF
  1.1275 -FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI
  1.1276 -FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL
  1.1277 -FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI
  1.1278 -FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL
  1.1279 -FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T
  1.1280 -FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST
  1.1281 -FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW
  1.1282 -FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH
  1.1283 -FB15; F; 0574 056B; # ARMENIAN SMALL LIGATURE MEN INI
  1.1284 -FB16; F; 057E 0576; # ARMENIAN SMALL LIGATURE VEW NOW
  1.1285 -FB17; F; 0574 056D; # ARMENIAN SMALL LIGATURE MEN XEH
  1.1286 -FF21; C; FF41; # FULLWIDTH LATIN CAPITAL LETTER A
  1.1287 -FF22; C; FF42; # FULLWIDTH LATIN CAPITAL LETTER B
  1.1288 -FF23; C; FF43; # FULLWIDTH LATIN CAPITAL LETTER C
  1.1289 -FF24; C; FF44; # FULLWIDTH LATIN CAPITAL LETTER D
  1.1290 -FF25; C; FF45; # FULLWIDTH LATIN CAPITAL LETTER E
  1.1291 -FF26; C; FF46; # FULLWIDTH LATIN CAPITAL LETTER F
  1.1292 -FF27; C; FF47; # FULLWIDTH LATIN CAPITAL LETTER G
  1.1293 -FF28; C; FF48; # FULLWIDTH LATIN CAPITAL LETTER H
  1.1294 -FF29; C; FF49; # FULLWIDTH LATIN CAPITAL LETTER I
  1.1295 -FF2A; C; FF4A; # FULLWIDTH LATIN CAPITAL LETTER J
  1.1296 -FF2B; C; FF4B; # FULLWIDTH LATIN CAPITAL LETTER K
  1.1297 -FF2C; C; FF4C; # FULLWIDTH LATIN CAPITAL LETTER L
  1.1298 -FF2D; C; FF4D; # FULLWIDTH LATIN CAPITAL LETTER M
  1.1299 -FF2E; C; FF4E; # FULLWIDTH LATIN CAPITAL LETTER N
  1.1300 -FF2F; C; FF4F; # FULLWIDTH LATIN CAPITAL LETTER O
  1.1301 -FF30; C; FF50; # FULLWIDTH LATIN CAPITAL LETTER P
  1.1302 -FF31; C; FF51; # FULLWIDTH LATIN CAPITAL LETTER Q
  1.1303 -FF32; C; FF52; # FULLWIDTH LATIN CAPITAL LETTER R
  1.1304 -FF33; C; FF53; # FULLWIDTH LATIN CAPITAL LETTER S
  1.1305 -FF34; C; FF54; # FULLWIDTH LATIN CAPITAL LETTER T
  1.1306 -FF35; C; FF55; # FULLWIDTH LATIN CAPITAL LETTER U
  1.1307 -FF36; C; FF56; # FULLWIDTH LATIN CAPITAL LETTER V
  1.1308 -FF37; C; FF57; # FULLWIDTH LATIN CAPITAL LETTER W
  1.1309 -FF38; C; FF58; # FULLWIDTH LATIN CAPITAL LETTER X
  1.1310 -FF39; C; FF59; # FULLWIDTH LATIN CAPITAL LETTER Y
  1.1311 -FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
  1.1312 -10400; C; 10428; # DESERET CAPITAL LETTER LONG I
  1.1313 -10401; C; 10429; # DESERET CAPITAL LETTER LONG E
  1.1314 -10402; C; 1042A; # DESERET CAPITAL LETTER LONG A
  1.1315 -10403; C; 1042B; # DESERET CAPITAL LETTER LONG AH
  1.1316 -10404; C; 1042C; # DESERET CAPITAL LETTER LONG O
  1.1317 -10405; C; 1042D; # DESERET CAPITAL LETTER LONG OO
  1.1318 -10406; C; 1042E; # DESERET CAPITAL LETTER SHORT I
  1.1319 -10407; C; 1042F; # DESERET CAPITAL LETTER SHORT E
  1.1320 -10408; C; 10430; # DESERET CAPITAL LETTER SHORT A
  1.1321 -10409; C; 10431; # DESERET CAPITAL LETTER SHORT AH
  1.1322 -1040A; C; 10432; # DESERET CAPITAL LETTER SHORT O
  1.1323 -1040B; C; 10433; # DESERET CAPITAL LETTER SHORT OO
  1.1324 -1040C; C; 10434; # DESERET CAPITAL LETTER AY
  1.1325 -1040D; C; 10435; # DESERET CAPITAL LETTER OW
  1.1326 -1040E; C; 10436; # DESERET CAPITAL LETTER WU
  1.1327 -1040F; C; 10437; # DESERET CAPITAL LETTER YEE
  1.1328 -10410; C; 10438; # DESERET CAPITAL LETTER H
  1.1329 -10411; C; 10439; # DESERET CAPITAL LETTER PEE
  1.1330 -10412; C; 1043A; # DESERET CAPITAL LETTER BEE
  1.1331 -10413; C; 1043B; # DESERET CAPITAL LETTER TEE
  1.1332 -10414; C; 1043C; # DESERET CAPITAL LETTER DEE
  1.1333 -10415; C; 1043D; # DESERET CAPITAL LETTER CHEE
  1.1334 -10416; C; 1043E; # DESERET CAPITAL LETTER JEE
  1.1335 -10417; C; 1043F; # DESERET CAPITAL LETTER KAY
  1.1336 -10418; C; 10440; # DESERET CAPITAL LETTER GAY
  1.1337 -10419; C; 10441; # DESERET CAPITAL LETTER EF
  1.1338 -1041A; C; 10442; # DESERET CAPITAL LETTER VEE
  1.1339 -1041B; C; 10443; # DESERET CAPITAL LETTER ETH
  1.1340 -1041C; C; 10444; # DESERET CAPITAL LETTER THEE
  1.1341 -1041D; C; 10445; # DESERET CAPITAL LETTER ES
  1.1342 -1041E; C; 10446; # DESERET CAPITAL LETTER ZEE
  1.1343 -1041F; C; 10447; # DESERET CAPITAL LETTER ESH
  1.1344 -10420; C; 10448; # DESERET CAPITAL LETTER ZHEE
  1.1345 -10421; C; 10449; # DESERET CAPITAL LETTER ER
  1.1346 -10422; C; 1044A; # DESERET CAPITAL LETTER EL
  1.1347 -10423; C; 1044B; # DESERET CAPITAL LETTER EM
  1.1348 -10424; C; 1044C; # DESERET CAPITAL LETTER EN
  1.1349 -10425; C; 1044D; # DESERET CAPITAL LETTER ENG
  1.1350 -10426; C; 1044E; # DESERET CAPITAL LETTER OI
  1.1351 -10427; C; 1044F; # DESERET CAPITAL LETTER EW
  1.1352 -END_OF_LIST
  1.1353 +$case_folding_string = File.open("CaseFolding.txt").read
  1.1354  
  1.1355  $case_folding = {}
  1.1356  $case_folding_string.chomp.split("\n").each do |line|
  1.1357 @@ -1514,8 +207,8 @@
  1.1358  comb_array = []
  1.1359  
  1.1360  chars.each do |char|
  1.1361 -  if char.decomp_type.nil? and char.decomp_mapping and
  1.1362 -      char.decomp_mapping.length == 2 and
  1.1363 +  if !char.nil? and char.decomp_type.nil? and char.decomp_mapping and
  1.1364 +      char.decomp_mapping.length == 2 and !char_hash[char.decomp_mapping[0]].nil? and
  1.1365        char_hash[char.decomp_mapping[0]].combining_class == 0 and
  1.1366        not $exclusions.include?(char.code)
  1.1367      unless comb1st_indicies[char.decomp_mapping[0]]

Impressum / About Us