rev |
line source |
jbe/bsw@0
|
1 --[[--
|
jbe@110
|
2 text = -- a string
|
jbe/bsw@0
|
3 format.string(
|
jbe@111
|
4 value, -- any value where tostring(value) gives a reasonable result
|
jbe/bsw@0
|
5 {
|
jbe@111
|
6 nil_as = nil_text, -- text to be returned for a nil value
|
jbe@111
|
7 truncate_mode = "codepoints", -- performe truncating by counting UTF-8 codepoints ("codepoints") or Unicode grapheme clusters ("graphmeclusters")
|
jbe@111
|
8 -- (currently only "codepoints" are supported and this option may be omitted)
|
jbe@111
|
9 truncate_at = truncate_at, -- truncate string after the given number of UTF-8 codepoints (or Unicode grapheme clusters)
|
jbe@111
|
10 truncate_suffix = truncate_suffix, -- string to append, if string was truncated (use boolean true for Unicode ellipsis)
|
jbe@111
|
11 truncate_count_suffix = truncate_count_suffix -- unless explicitly set to false, the total length (including suffix) may not exceed the given length
|
jbe/bsw@0
|
12 }
|
jbe/bsw@0
|
13 )
|
jbe/bsw@0
|
14
|
jbe@110
|
15 Formats a value as a text by calling tostring(...), unless the value is nil, in which case the text returned is chosen by the 'nil_as' option. Using the 'truncate_*' parameters, it is possible to show only the beginning of a string.
|
jbe/bsw@0
|
16
|
jbe/bsw@0
|
17 --]]--
|
jbe/bsw@0
|
18
|
jbe@111
|
19 local function codepoint_count(str)
|
jbe@111
|
20 return #string.gsub(str, '[\128-\255][\128-\191]?[\128-\191]?[\128-\191]?', 'x')
|
jbe@111
|
21 end
|
jbe@111
|
22
|
jbe@111
|
23 local function codepoint_truncate(str, length)
|
jbe@111
|
24 local byte_pos = 1
|
jbe@111
|
25 local count = 0
|
jbe@111
|
26 while count < length do
|
jbe@110
|
27 b1, b2, b3, b4 = string.byte(str, byte_pos, byte_pos+3)
|
jbe@111
|
28 if not b2 then
|
jbe@111
|
29 break
|
jbe@111
|
30 end
|
jbe@111
|
31 b3 = b3 or 0
|
jbe@111
|
32 b4 = b4 or 0
|
jbe@111
|
33 if b1 >= 128 and b2 >= 128 and b2 <= 191 then
|
jbe@111
|
34 if b3 >= 128 and b3 <= 191 then
|
jbe@111
|
35 if b4 >= 128 and b4 <= 191 then
|
jbe@111
|
36 byte_pos = byte_pos + 4
|
jbe@111
|
37 count = count + 1
|
jbe@111
|
38 elseif count + 1 < length and b4 < 128 then
|
jbe@111
|
39 byte_pos = byte_pos + 4
|
jbe@111
|
40 count = count + 2
|
jbe@110
|
41 else
|
jbe@111
|
42 byte_pos = byte_pos + 3
|
jbe@111
|
43 count = count + 1
|
jbe@111
|
44 end
|
jbe@111
|
45 elseif count + 1 < length and b3 < 128 then
|
jbe@111
|
46 if count + 2 < length and b4 < 128 then
|
jbe@111
|
47 byte_pos = byte_pos + 4
|
jbe@111
|
48 count = count + 3
|
jbe@111
|
49 else
|
jbe@111
|
50 byte_pos = byte_pos + 3
|
jbe@111
|
51 count = count + 2
|
jbe@110
|
52 end
|
jbe@110
|
53 else
|
jbe@111
|
54 byte_pos = byte_pos + 2
|
jbe@111
|
55 count = count + 1
|
jbe@110
|
56 end
|
jbe@111
|
57 elseif count + 1 < length and b2 < 128 then
|
jbe@111
|
58 if count + 2 < length and b3 < 128 then
|
jbe@111
|
59 if count + 3 < length and b4 < 128 then
|
jbe@111
|
60 byte_pos = byte_pos + 4
|
jbe@111
|
61 count = count + 4
|
jbe@111
|
62 else
|
jbe@111
|
63 byte_pos = byte_pos + 3
|
jbe@111
|
64 count = count + 3
|
jbe@111
|
65 end
|
jbe@111
|
66 else
|
jbe@111
|
67 byte_pos = byte_pos + 2
|
jbe@111
|
68 count = count + 2
|
jbe@111
|
69 end
|
jbe@110
|
70 else
|
jbe@111
|
71 byte_pos = byte_pos + 1
|
jbe@111
|
72 count = count + 1
|
jbe@110
|
73 end
|
jbe@110
|
74 end
|
jbe@111
|
75 return string.sub(str, 1, byte_pos-1)
|
jbe@110
|
76 end
|
jbe@110
|
77
|
jbe/bsw@0
|
78 function format.string(str, options)
|
jbe/bsw@0
|
79 local options = options or {}
|
jbe/bsw@0
|
80 if str == nil then
|
jbe/bsw@0
|
81 return options.nil_as or ""
|
jbe@111
|
82 elseif options.truncate_at then
|
jbe@111
|
83 str = tostring(str)
|
jbe@110
|
84 -- TODO: Unicode grapheme cluster boundary detetion is not implemented
|
jbe@110
|
85 -- (Unicode codepoints are used instead)
|
jbe@111
|
86 local truncate_suffix = options.truncate_suffix
|
jbe@111
|
87 if truncate_suffix == true then
|
jbe@111
|
88 truncate_suffix = '\226\128\166'
|
jbe@111
|
89 elseif not truncate_suffix then
|
jbe@111
|
90 truncate_suffix = ''
|
jbe@111
|
91 end
|
jbe@111
|
92 if options.truncate_count_suffix ~= false and truncate_suffix then
|
jbe@111
|
93 local suffix_length = codepoint_count(truncate_suffix)
|
jbe@111
|
94 if codepoint_count(str) > options.truncate_at then
|
jbe@111
|
95 return (
|
jbe@111
|
96 codepoint_truncate(str, options.truncate_at - suffix_length) ..
|
jbe@111
|
97 truncate_suffix
|
jbe@111
|
98 )
|
jbe@111
|
99 else
|
jbe@111
|
100 return str
|
jbe@111
|
101 end
|
jbe@111
|
102 else
|
jbe@111
|
103 if codepoint_count(str) > options.truncate_at then
|
jbe@111
|
104 return codepoint_truncate(str, options.truncate_at) .. truncate_suffix
|
jbe@111
|
105 else
|
jbe@111
|
106 return str
|
jbe@111
|
107 end
|
jbe@111
|
108 end
|
jbe/bsw@0
|
109 else
|
jbe/bsw@0
|
110 return tostring(str)
|
jbe/bsw@0
|
111 end
|
jbe/bsw@0
|
112 end
|