webmcp

view framework/env/format/string.lua @ 110:0c4841af07a5

String truncating by counting Unicode codepoints in format.string(...)
(grapheme cluster boundary detection not implemented)
author jbe
date Sun Jan 12 03:57:47 2014 +0100 (2014-01-12)
parents 9fdfb27f8e67
children 43986d8dacf3
line source
1 --[[--
2 text = -- a string
3 format.string(
4 value, -- any value where tostring(value) gives a reasonable result
5 {
6 nil_as = nil_text, -- text to be returned for a nil value
7 truncate_mode = "codepoints", -- performe truncating by counting UTF-8 codepoints ("codepoints") or Unicode grapheme clusters ("graphmeclusters")
8 -- (currently only "codepoints" are supported and this option may be omitted)
9 truncate_at = truncate_at, -- truncate string after the given number of UTF-8 codepoints (or Unicode grapheme clusterst)
10 truncate_if = truncate_if, -- truncate only if length exceeds the given number of UTF-8 codepoints (or Unicode grapheme clusters)
11 truncate_suffix = truncate_suffix -- string to append, if string was truncated
12 }
13 )
15 Formats a value as a text by calling tostring(...), unless the value is nil, in which case the text returned is chosen by the 'nil_as' option. Using the 'truncate_*' parameters, it is possible to show only the beginning of a string.
17 --]]--
19 function truncate_codepoints(str, truncate_at_codepoint, truncate_if_codepoint, suffix)
20 local byte_pos = 0
21 local codepoint_pos = 0
22 local truncate_at_byte
23 truncate_at_codepoint = truncate_at_codepoint or truncate_if_codepoint
24 truncate_if_codepoint = truncate_if_codepoint or truncate_at_codepoint
25 while true do
26 b1, b2, b3, b4 = string.byte(str, byte_pos, byte_pos+3)
27 if b1 then
28 if codepoint_pos > truncate_if_codepoint then
29 return string.sub(str, 1, truncate_at_byte or byte_pos) .. (suffix or "")
30 end
31 if codepoint_pos == truncate_at_codepoint then
32 truncate_at_byte = byte_pos
33 end
34 if b1 < 128 then
35 byte_pos = byte_pos + 1
36 elseif b1 >= 192 and b1 < 248 then
37 if b2 and b2 >= 128 and b2 < 192 then
38 if b1 < 240 and b3 and b3 >= 128 and b3 < 192 then
39 if b1 < 224 and b4 and b4 >= 128 and b4 < 192 then
40 byte_pos = byte_pos + 4
41 else
42 byte_pos = byte_pos + 3
43 end
44 else
45 byte_pos = byte_pos + 2
46 end
47 else
48 byte_pos = byte_pos + 1
49 end
50 else
51 byte_pos = byte_pos + 1
52 end
53 codepoint_pos = codepoint_pos + 1
54 else
55 break
56 end
57 end
58 return str
59 end
61 function format.string(str, options)
62 local options = options or {}
63 if str == nil then
64 return options.nil_as or ""
65 elseif options.truncate_at or options.truncate_if then
66 -- TODO: Unicode grapheme cluster boundary detetion is not implemented
67 -- (Unicode codepoints are used instead)
68 return truncate_codepoints(
69 tostring(str),
70 options.truncate_at,
71 options.truncate_if,
72 options.truncate_suffix
73 )
74 else
75 return tostring(str)
76 end
77 end

Impressum / About Us