jbe/bsw@0: --[[-- jbe@471: text = -- a string jbe/bsw@0: format.string( jbe@111: value, -- any value where tostring(value) gives a reasonable result jbe/bsw@0: { jbe@111: nil_as = nil_text, -- text to be returned for a nil value jbe@111: truncate_mode = "codepoints", -- performe truncating by counting UTF-8 codepoints ("codepoints") or Unicode grapheme clusters ("graphmeclusters") jbe@111: -- (currently only "codepoints" are supported and this option may be omitted) jbe@111: truncate_at = truncate_at, -- truncate string after the given number of UTF-8 codepoints (or Unicode grapheme clusters) jbe@111: truncate_suffix = truncate_suffix, -- string to append, if string was truncated (use boolean true for Unicode ellipsis) jbe@112: truncate_count_suffix = truncate_count_suffix -- if true, then the total length (including suffix) may not exceed the given length jbe/bsw@0: } jbe/bsw@0: ) jbe/bsw@0: jbe@110: Formats a value as a text by calling tostring(...), unless the value is nil, in which case the text returned is chosen by the 'nil_as' option. Using the 'truncate_*' parameters, it is possible to show only the beginning of a string. jbe/bsw@0: jbe/bsw@0: --]]-- jbe/bsw@0: jbe@111: local function codepoint_count(str) jbe@111: return #string.gsub(str, '[\128-\255][\128-\191]?[\128-\191]?[\128-\191]?', 'x') jbe@111: end jbe@111: jbe@111: local function codepoint_truncate(str, length) jbe@111: local byte_pos = 1 jbe@111: local count = 0 jbe@111: while count < length do jbe@110: b1, b2, b3, b4 = string.byte(str, byte_pos, byte_pos+3) jbe@111: if not b2 then jbe@111: break jbe@111: end jbe@111: b3 = b3 or 0 jbe@111: b4 = b4 or 0 jbe@111: if b1 >= 128 and b2 >= 128 and b2 <= 191 then jbe@111: if b3 >= 128 and b3 <= 191 then jbe@111: if b4 >= 128 and b4 <= 191 then jbe@111: byte_pos = byte_pos + 4 jbe@111: count = count + 1 jbe@111: elseif count + 1 < length and b4 < 128 then jbe@111: byte_pos = byte_pos + 4 jbe@111: count = count + 2 jbe@110: else jbe@111: byte_pos = byte_pos + 3 jbe@111: count = count + 1 jbe@111: end jbe@111: elseif count + 1 < length and b3 < 128 then jbe@111: if count + 2 < length and b4 < 128 then jbe@111: byte_pos = byte_pos + 4 jbe@111: count = count + 3 jbe@111: else jbe@111: byte_pos = byte_pos + 3 jbe@111: count = count + 2 jbe@110: end jbe@110: else jbe@111: byte_pos = byte_pos + 2 jbe@111: count = count + 1 jbe@110: end jbe@111: elseif count + 1 < length and b2 < 128 then jbe@111: if count + 2 < length and b3 < 128 then jbe@111: if count + 3 < length and b4 < 128 then jbe@111: byte_pos = byte_pos + 4 jbe@111: count = count + 4 jbe@111: else jbe@111: byte_pos = byte_pos + 3 jbe@111: count = count + 3 jbe@111: end jbe@111: else jbe@111: byte_pos = byte_pos + 2 jbe@111: count = count + 2 jbe@111: end jbe@110: else jbe@111: byte_pos = byte_pos + 1 jbe@111: count = count + 1 jbe@110: end jbe@110: end jbe@111: return string.sub(str, 1, byte_pos-1) jbe@110: end jbe@110: jbe/bsw@0: function format.string(str, options) jbe/bsw@0: local options = options or {} jbe/bsw@0: if str == nil then jbe/bsw@0: return options.nil_as or "" jbe@111: elseif options.truncate_at then jbe@111: str = tostring(str) jbe@110: -- TODO: Unicode grapheme cluster boundary detetion is not implemented jbe@110: -- (Unicode codepoints are used instead) jbe@111: local truncate_suffix = options.truncate_suffix jbe@111: if truncate_suffix == true then jbe@111: truncate_suffix = '\226\128\166' jbe@111: elseif not truncate_suffix then jbe@111: truncate_suffix = '' jbe@111: end jbe@112: if options.truncate_count_suffix and truncate_suffix then jbe@111: local suffix_length = codepoint_count(truncate_suffix) jbe@111: if codepoint_count(str) > options.truncate_at then jbe@111: return ( jbe@111: codepoint_truncate(str, options.truncate_at - suffix_length) .. jbe@111: truncate_suffix jbe@111: ) jbe@111: else jbe@111: return str jbe@111: end jbe@111: else jbe@111: if codepoint_count(str) > options.truncate_at then jbe@111: return codepoint_truncate(str, options.truncate_at) .. truncate_suffix jbe@111: else jbe@111: return str jbe@111: end jbe@111: end jbe/bsw@0: else jbe/bsw@0: return tostring(str) jbe/bsw@0: end jbe/bsw@0: end