| rev | line source | 
| jbe/bsw@0 | 1 --[[-- | 
| jbe@110 | 2 text =                                          -- a string | 
| jbe/bsw@0 | 3 format.string( | 
| jbe@111 | 4   value,                                           -- any value where tostring(value) gives a reasonable result | 
| jbe/bsw@0 | 5   { | 
| jbe@111 | 6     nil_as                = nil_text,              -- text to be returned for a nil value | 
| jbe@111 | 7     truncate_mode         = "codepoints",          -- performe truncating by counting UTF-8 codepoints ("codepoints") or Unicode grapheme clusters ("graphmeclusters") | 
| jbe@111 | 8                                                    -- (currently only "codepoints" are supported and this option may be omitted) | 
| jbe@111 | 9     truncate_at           = truncate_at,           -- truncate string after the given number of UTF-8 codepoints (or Unicode grapheme clusters) | 
| jbe@111 | 10     truncate_suffix       = truncate_suffix,       -- string to append, if string was truncated (use boolean true for Unicode ellipsis) | 
| jbe@112 | 11     truncate_count_suffix = truncate_count_suffix  -- if true, then the total length (including suffix) may not exceed the given length | 
| jbe/bsw@0 | 12   } | 
| jbe/bsw@0 | 13 ) | 
| jbe/bsw@0 | 14 | 
| jbe@110 | 15 Formats a value as a text by calling tostring(...), unless the value is nil, in which case the text returned is chosen by the 'nil_as' option. Using the 'truncate_*' parameters, it is possible to show only the beginning of a string. | 
| jbe/bsw@0 | 16 | 
| jbe/bsw@0 | 17 --]]-- | 
| jbe/bsw@0 | 18 | 
| jbe@111 | 19 local function codepoint_count(str) | 
| jbe@111 | 20   return #string.gsub(str, '[\128-\255][\128-\191]?[\128-\191]?[\128-\191]?', 'x') | 
| jbe@111 | 21 end | 
| jbe@111 | 22 | 
| jbe@111 | 23 local function codepoint_truncate(str, length) | 
| jbe@111 | 24   local byte_pos = 1 | 
| jbe@111 | 25   local count = 0 | 
| jbe@111 | 26   while count < length do | 
| jbe@110 | 27     b1, b2, b3, b4 = string.byte(str, byte_pos, byte_pos+3) | 
| jbe@111 | 28     if not b2 then | 
| jbe@111 | 29       break | 
| jbe@111 | 30     end | 
| jbe@111 | 31     b3 = b3 or 0 | 
| jbe@111 | 32     b4 = b4 or 0 | 
| jbe@111 | 33     if b1 >= 128 and b2 >= 128 and b2 <= 191 then | 
| jbe@111 | 34       if b3 >= 128 and b3 <= 191 then | 
| jbe@111 | 35         if b4 >= 128 and b4 <= 191 then | 
| jbe@111 | 36           byte_pos = byte_pos + 4 | 
| jbe@111 | 37           count = count + 1 | 
| jbe@111 | 38         elseif count + 1 < length and b4 < 128 then | 
| jbe@111 | 39           byte_pos = byte_pos + 4 | 
| jbe@111 | 40           count = count + 2 | 
| jbe@110 | 41         else | 
| jbe@111 | 42           byte_pos = byte_pos + 3 | 
| jbe@111 | 43           count = count + 1 | 
| jbe@111 | 44         end | 
| jbe@111 | 45       elseif count + 1 < length and b3 < 128 then | 
| jbe@111 | 46         if count + 2 < length and b4 < 128 then | 
| jbe@111 | 47           byte_pos = byte_pos + 4 | 
| jbe@111 | 48           count = count + 3 | 
| jbe@111 | 49         else | 
| jbe@111 | 50           byte_pos = byte_pos + 3 | 
| jbe@111 | 51           count = count + 2 | 
| jbe@110 | 52         end | 
| jbe@110 | 53       else | 
| jbe@111 | 54         byte_pos = byte_pos + 2 | 
| jbe@111 | 55         count = count + 1 | 
| jbe@110 | 56       end | 
| jbe@111 | 57     elseif count + 1 < length and b2 < 128 then | 
| jbe@111 | 58       if count + 2 < length and b3 < 128 then | 
| jbe@111 | 59         if count + 3 < length and b4 < 128 then | 
| jbe@111 | 60           byte_pos = byte_pos + 4 | 
| jbe@111 | 61           count = count + 4 | 
| jbe@111 | 62         else | 
| jbe@111 | 63           byte_pos = byte_pos + 3 | 
| jbe@111 | 64           count = count + 3 | 
| jbe@111 | 65         end | 
| jbe@111 | 66       else | 
| jbe@111 | 67         byte_pos = byte_pos + 2 | 
| jbe@111 | 68         count = count + 2 | 
| jbe@111 | 69       end | 
| jbe@110 | 70     else | 
| jbe@111 | 71       byte_pos = byte_pos + 1 | 
| jbe@111 | 72       count = count + 1 | 
| jbe@110 | 73     end | 
| jbe@110 | 74   end | 
| jbe@111 | 75   return string.sub(str, 1, byte_pos-1) | 
| jbe@110 | 76 end | 
| jbe@110 | 77 | 
| jbe/bsw@0 | 78 function format.string(str, options) | 
| jbe/bsw@0 | 79   local options = options or {} | 
| jbe/bsw@0 | 80   if str == nil then | 
| jbe/bsw@0 | 81     return options.nil_as or "" | 
| jbe@111 | 82   elseif options.truncate_at then | 
| jbe@111 | 83     str = tostring(str) | 
| jbe@110 | 84     -- TODO: Unicode grapheme cluster boundary detetion is not implemented | 
| jbe@110 | 85     -- (Unicode codepoints are used instead) | 
| jbe@111 | 86     local truncate_suffix = options.truncate_suffix | 
| jbe@111 | 87     if truncate_suffix == true then | 
| jbe@111 | 88       truncate_suffix = '\226\128\166' | 
| jbe@111 | 89     elseif not truncate_suffix then | 
| jbe@111 | 90       truncate_suffix = '' | 
| jbe@111 | 91     end | 
| jbe@112 | 92     if options.truncate_count_suffix and truncate_suffix then | 
| jbe@111 | 93       local suffix_length = codepoint_count(truncate_suffix) | 
| jbe@111 | 94       if codepoint_count(str) > options.truncate_at then | 
| jbe@111 | 95         return ( | 
| jbe@111 | 96           codepoint_truncate(str, options.truncate_at - suffix_length) .. | 
| jbe@111 | 97           truncate_suffix | 
| jbe@111 | 98         ) | 
| jbe@111 | 99       else | 
| jbe@111 | 100         return str | 
| jbe@111 | 101       end | 
| jbe@111 | 102     else | 
| jbe@111 | 103       if codepoint_count(str) > options.truncate_at then | 
| jbe@111 | 104         return codepoint_truncate(str, options.truncate_at) .. truncate_suffix | 
| jbe@111 | 105       else | 
| jbe@111 | 106         return str | 
| jbe@111 | 107       end | 
| jbe@111 | 108     end | 
| jbe/bsw@0 | 109   else | 
| jbe/bsw@0 | 110     return tostring(str) | 
| jbe/bsw@0 | 111   end | 
| jbe/bsw@0 | 112 end |