liquid_feedback_frontend

view env/util/html_is_safe.lua @ 1841:e6983d79d74f

Fix Lua error on <ol> or <ul> tag at end of input to util.html_is_safe(...)
author jbe
date Thu Feb 03 15:21:45 2022 +0100 (2022-02-03)
parents 32cc544d5a5b
children 27d2a7609cc1
line source
1 function util.html_is_safe(str)
3 -- All (ASCII) control characters except \t\n\f\r are forbidden:
4 if string.find(str, "[\0-\8\11\14-\31\127]") then
5 return false, "Invalid ASCII control character"
6 end
8 -- Memorize expected closing tags:
9 local stack = {}
11 -- State during parsing:
12 local para = false -- <p> tag open
13 local bold = false -- <b> tag open
14 local italic = false -- <i> tag open
15 local supsub = false -- <sup> or <sub> tag open
16 local link = false -- <a href="..."> tag open
17 local heading = false -- <h1-6> tag open
18 local list = false -- <ol> or <ul> (but no corresponding <li>) tag open
19 local listelm = false -- <li> tag (but no further <ol> or <ul> tag) open
21 -- Function looped with tail-calls:
22 local function loop(str)
24 -- NOTE: We do not allow non-escaped "<" or ">" in attributes,
25 -- even if HTML5 allows it.
27 -- Find any "<" or ">" character and determine context, i.e.
28 -- pre = text before character, tag = text until closing ">", and rest:
29 local pre, tag, rest = string.match(str, "^(.-)([<>][^<>]*>?)(.*)")
31 -- If no more "<" or ">" characters are found,
32 -- then return true if all tags have been closed:
33 if not tag then
34 if #stack == 0 then
35 return true
36 else
37 return false, "Not all tags have been closed"
38 end
39 end
41 -- Disallow text content (except inter-element white-space) in <ol> or <ul>
42 -- when outside <li>:
43 if list and string.find(pre, "[^\t\n\f\r ]") then
44 return false, "Text content in list but outside list element"
45 end
47 -- Handle (expected) closing tags:
48 local closed_tagname = string.match(tag, "^</(.-)[\t\n\f\r ]*>$")
49 if closed_tagname then
50 closed_tagname = string.lower(closed_tagname)
51 if closed_tagname ~= stack[#stack] then
52 return false, "Wrong closing tag"
53 end
54 if closed_tagname == "p" then
55 para = false
56 elseif closed_tagname == "b" then
57 bold = false
58 elseif closed_tagname == "i" then
59 italic = false
60 elseif closed_tagname == "sup" or closed_tagname == "sub" then
61 supsub = false
62 elseif closed_tagname == "a" then
63 link = false
64 elseif string.find(closed_tagname, "^h[1-6]$") then
65 heading = false
66 elseif closed_tagname == "ul" or closed_tagname == "ol" then
67 list = false
68 elseif closed_tagname == "li" then
69 listelm = false
70 list = true
71 end
72 stack[#stack] = nil
73 return loop(rest)
74 end
76 -- Allow <br> tag as void tag:
77 if string.find(tag, "^<[Bb][Rr][\t\n\f\r ]*/?>$") then
78 return loop(rest)
79 end
81 -- Parse opening tag:
82 local tagname, attrs = string.match(
83 tag,
84 "^<([^<>\0-\32]+)[\t\n\f\r ]*([^<>]-)[\t\n\f\r ]*>$"
85 )
87 -- Return false if tag could not be parsed:
88 if not tagname then
89 return false, "Malformed tag"
90 end
92 -- Make tagname lowercase:
93 tagname = string.lower(tagname)
95 -- Append closing tag to list of expected closing tags:
96 stack[#stack+1] = tagname
98 -- Allow <li> tag in proper context:
99 if tagname == "li" and attrs == "" then
100 if not list then
101 return false, "List element outside list"
102 end
103 list = false
104 listelm = true
105 return loop(rest)
106 end
108 -- If there was no valid <li> tag but <ol> or <ul> is open,
109 -- then return false:
110 if list then
111 return false
112 end
114 -- Allow <b>, <i>, <sup>, <sub> unless already open:
115 if tagname == "b" and attrs == "" then
116 if bold then
117 return false, "Bold inside bold tag"
118 end
119 bold = true
120 return loop(rest)
121 end
122 if tagname == "i" and attrs == "" then
123 if italic then
124 return false, "Italic inside italic tag"
125 end
126 italic = true
127 return loop(rest)
128 end
129 if (tagname == "sup" or tagname == "sub") and attrs == "" then
130 if supsub then
131 return false, "Super/subscript inside super/subscript tag"
132 end
133 supsub = true
134 return loop(rest)
135 end
137 -- Allow <a href="..."> tag unless already open or malformed:
138 if tagname == "a" then
139 if link then
140 return false, "Link inside link"
141 end
142 local url = string.match(attrs, '^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*"([^"]*)"$')
143 if not url then
144 url = string.match(attrs, "^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*'([^']*)'$")
145 end
146 if not url then
147 url = string.match(attrs, "^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*([^\0-\32\"'=<>`]+)$")
148 end
149 if not url then
150 return false, "Forbidden, missing, or malformed attributes in link tag"
151 end
152 if not string.find(url, "^[Hh][Tt][Tt][Pp][Ss]?://") then
153 return false, "Invalid link URL"
154 end
155 link = true
156 return loop(rest)
157 end
159 -- Remaining tags require no open <p>, <b>, <i>, <sup>, <sub>,
160 -- <a href="...">, or <h1>..</h6> tag:
161 if para or bold or italic or supsub or link or heading then
162 return false, "Forbidden child tag within paragraph, bold, italic, super/subscript, link, or heading tag"
163 end
165 -- Allow <p>:
166 if tagname == "p" and attrs == "" then
167 para = true
168 return loop(rest)
169 end
171 -- Allow <h1>..<h6>:
172 if string.find(tagname, "^h[1-6]$") and attrs == "" then
173 heading = true
174 return loop(rest)
175 end
177 -- Allow <ul> and <ol>:
178 if (tagname == "ul" or tagname == "ol") and attrs == "" then
179 list = true
180 return loop(rest)
181 end
183 -- Disallow all others (including unexpected closing tags):
184 return false, "Forbidden tag or forbidden attributes"
186 end
188 -- Invoke tail-call loop:
189 return loop(str)
191 end

Impressum / About Us