webmcp
view framework/env/format/string.lua @ 111:43986d8dacf3
Better (and bugfixed) implementation of string truncating in format.string(...)
| author | jbe | 
|---|---|
| date | Mon Jan 13 21:37:23 2014 +0100 (2014-01-13) | 
| parents | 0c4841af07a5 | 
| children | 407633fd0e84 | 
 line source
     1 --[[--
     2 text =                                          -- a string
     3 format.string(
     4   value,                                           -- any value where tostring(value) gives a reasonable result
     5   {
     6     nil_as                = nil_text,              -- text to be returned for a nil value
     7     truncate_mode         = "codepoints",          -- performe truncating by counting UTF-8 codepoints ("codepoints") or Unicode grapheme clusters ("graphmeclusters")
     8                                                    -- (currently only "codepoints" are supported and this option may be omitted)
     9     truncate_at           = truncate_at,           -- truncate string after the given number of UTF-8 codepoints (or Unicode grapheme clusters)
    10     truncate_suffix       = truncate_suffix,       -- string to append, if string was truncated (use boolean true for Unicode ellipsis)
    11     truncate_count_suffix = truncate_count_suffix  -- unless explicitly set to false, the total length (including suffix) may not exceed the given length
    12   }
    13 )
    15 Formats a value as a text by calling tostring(...), unless the value is nil, in which case the text returned is chosen by the 'nil_as' option. Using the 'truncate_*' parameters, it is possible to show only the beginning of a string.
    17 --]]--
    19 local function codepoint_count(str)
    20   return #string.gsub(str, '[\128-\255][\128-\191]?[\128-\191]?[\128-\191]?', 'x')
    21 end
    23 local function codepoint_truncate(str, length)
    24   local byte_pos = 1
    25   local count = 0
    26   while count < length do
    27     b1, b2, b3, b4 = string.byte(str, byte_pos, byte_pos+3)
    28     if not b2 then
    29       break
    30     end
    31     b3 = b3 or 0
    32     b4 = b4 or 0
    33     if b1 >= 128 and b2 >= 128 and b2 <= 191 then
    34       if b3 >= 128 and b3 <= 191 then
    35         if b4 >= 128 and b4 <= 191 then
    36           byte_pos = byte_pos + 4
    37           count = count + 1
    38         elseif count + 1 < length and b4 < 128 then
    39           byte_pos = byte_pos + 4
    40           count = count + 2
    41         else
    42           byte_pos = byte_pos + 3
    43           count = count + 1
    44         end
    45       elseif count + 1 < length and b3 < 128 then
    46         if count + 2 < length and b4 < 128 then
    47           byte_pos = byte_pos + 4
    48           count = count + 3
    49         else
    50           byte_pos = byte_pos + 3
    51           count = count + 2
    52         end
    53       else
    54         byte_pos = byte_pos + 2
    55         count = count + 1
    56       end
    57     elseif count + 1 < length and b2 < 128 then
    58       if count + 2 < length and b3 < 128 then
    59         if count + 3 < length and b4 < 128 then
    60           byte_pos = byte_pos + 4
    61           count = count + 4
    62         else
    63           byte_pos = byte_pos + 3
    64           count = count + 3
    65         end
    66       else
    67         byte_pos = byte_pos + 2
    68         count = count + 2
    69       end
    70     else
    71       byte_pos = byte_pos + 1
    72       count = count + 1
    73     end
    74   end
    75   return string.sub(str, 1, byte_pos-1)
    76 end
    78 function format.string(str, options)
    79   local options = options or {}
    80   if str == nil then
    81     return options.nil_as or ""
    82   elseif options.truncate_at then
    83     str = tostring(str)
    84     -- TODO: Unicode grapheme cluster boundary detetion is not implemented
    85     -- (Unicode codepoints are used instead)
    86     local truncate_suffix = options.truncate_suffix
    87     if truncate_suffix == true then
    88       truncate_suffix = '\226\128\166'
    89     elseif not truncate_suffix then
    90       truncate_suffix = ''
    91     end
    92     if options.truncate_count_suffix ~= false and truncate_suffix then
    93       local suffix_length = codepoint_count(truncate_suffix)
    94       if codepoint_count(str) > options.truncate_at then
    95         return (
    96           codepoint_truncate(str, options.truncate_at - suffix_length) ..
    97           truncate_suffix
    98         )
    99       else
   100         return str
   101       end
   102     else
   103       if codepoint_count(str) > options.truncate_at then
   104         return codepoint_truncate(str, options.truncate_at) .. truncate_suffix
   105       else
   106         return str
   107       end
   108     end
   109   else
   110     return tostring(str)
   111   end
   112 end
