webmcp

annotate framework/env/format/string.lua @ 110:0c4841af07a5

String truncating by counting Unicode codepoints in format.string(...)
(grapheme cluster boundary detection not implemented)
author jbe
date Sun Jan 12 03:57:47 2014 +0100 (2014-01-12)
parents 9fdfb27f8e67
children 43986d8dacf3
rev   line source
jbe/bsw@0 1 --[[--
jbe@110 2 text = -- a string
jbe/bsw@0 3 format.string(
jbe@110 4 value, -- any value where tostring(value) gives a reasonable result
jbe/bsw@0 5 {
jbe@110 6 nil_as = nil_text, -- text to be returned for a nil value
jbe@110 7 truncate_mode = "codepoints", -- performe truncating by counting UTF-8 codepoints ("codepoints") or Unicode grapheme clusters ("graphmeclusters")
jbe@110 8 -- (currently only "codepoints" are supported and this option may be omitted)
jbe@110 9 truncate_at = truncate_at, -- truncate string after the given number of UTF-8 codepoints (or Unicode grapheme clusterst)
jbe@110 10 truncate_if = truncate_if, -- truncate only if length exceeds the given number of UTF-8 codepoints (or Unicode grapheme clusters)
jbe@110 11 truncate_suffix = truncate_suffix -- string to append, if string was truncated
jbe/bsw@0 12 }
jbe/bsw@0 13 )
jbe/bsw@0 14
jbe@110 15 Formats a value as a text by calling tostring(...), unless the value is nil, in which case the text returned is chosen by the 'nil_as' option. Using the 'truncate_*' parameters, it is possible to show only the beginning of a string.
jbe/bsw@0 16
jbe/bsw@0 17 --]]--
jbe/bsw@0 18
jbe@110 19 function truncate_codepoints(str, truncate_at_codepoint, truncate_if_codepoint, suffix)
jbe@110 20 local byte_pos = 0
jbe@110 21 local codepoint_pos = 0
jbe@110 22 local truncate_at_byte
jbe@110 23 truncate_at_codepoint = truncate_at_codepoint or truncate_if_codepoint
jbe@110 24 truncate_if_codepoint = truncate_if_codepoint or truncate_at_codepoint
jbe@110 25 while true do
jbe@110 26 b1, b2, b3, b4 = string.byte(str, byte_pos, byte_pos+3)
jbe@110 27 if b1 then
jbe@110 28 if codepoint_pos > truncate_if_codepoint then
jbe@110 29 return string.sub(str, 1, truncate_at_byte or byte_pos) .. (suffix or "")
jbe@110 30 end
jbe@110 31 if codepoint_pos == truncate_at_codepoint then
jbe@110 32 truncate_at_byte = byte_pos
jbe@110 33 end
jbe@110 34 if b1 < 128 then
jbe@110 35 byte_pos = byte_pos + 1
jbe@110 36 elseif b1 >= 192 and b1 < 248 then
jbe@110 37 if b2 and b2 >= 128 and b2 < 192 then
jbe@110 38 if b1 < 240 and b3 and b3 >= 128 and b3 < 192 then
jbe@110 39 if b1 < 224 and b4 and b4 >= 128 and b4 < 192 then
jbe@110 40 byte_pos = byte_pos + 4
jbe@110 41 else
jbe@110 42 byte_pos = byte_pos + 3
jbe@110 43 end
jbe@110 44 else
jbe@110 45 byte_pos = byte_pos + 2
jbe@110 46 end
jbe@110 47 else
jbe@110 48 byte_pos = byte_pos + 1
jbe@110 49 end
jbe@110 50 else
jbe@110 51 byte_pos = byte_pos + 1
jbe@110 52 end
jbe@110 53 codepoint_pos = codepoint_pos + 1
jbe@110 54 else
jbe@110 55 break
jbe@110 56 end
jbe@110 57 end
jbe@110 58 return str
jbe@110 59 end
jbe@110 60
jbe/bsw@0 61 function format.string(str, options)
jbe/bsw@0 62 local options = options or {}
jbe/bsw@0 63 if str == nil then
jbe/bsw@0 64 return options.nil_as or ""
jbe@110 65 elseif options.truncate_at or options.truncate_if then
jbe@110 66 -- TODO: Unicode grapheme cluster boundary detetion is not implemented
jbe@110 67 -- (Unicode codepoints are used instead)
jbe@110 68 return truncate_codepoints(
jbe@110 69 tostring(str),
jbe@110 70 options.truncate_at,
jbe@110 71 options.truncate_if,
jbe@110 72 options.truncate_suffix
jbe@110 73 )
jbe/bsw@0 74 else
jbe/bsw@0 75 return tostring(str)
jbe/bsw@0 76 end
jbe/bsw@0 77 end

Impressum / About Us