
view env/util/html_is_safe.lua @ 1859:02c34183b6df

Fixed wrong filename in INSTALL file
author bsw
date Tue Nov 28 18:54:51 2023 +0100 (6 months ago)
parents 27d2a7609cc1
line source
1 function util.html_is_safe(str)
3 -- All (ASCII) control characters except \t\n\f\r are forbidden:
4 if string.find(str, "[\0-\8\11\14-\31\127]") then
5 return false, "Invalid ASCII control character"
6 end
8 -- Memorize expected closing tags:
9 local stack = {}
11 -- State during parsing:
12 local para = false -- <p> tag open
13 local bold = false -- <b> tag open
14 local italic = false -- <i> tag open
15 local supsub = false -- <sup> or <sub> tag open
16 local link = false -- <a href="..."> tag open
17 local heading = false -- <h1-6> tag open
18 local list = false -- <ol> or <ul> (but no corresponding <li>) tag open
19 local listelm = false -- <li> tag (but no further <ol> or <ul> tag) open
20 local pre = false -- <pre> tag open
22 -- Function looped with tail-calls:
23 local function loop(str)
25 -- NOTE: We do not allow non-escaped "<" or ">" in attributes,
26 -- even if HTML5 allows it.
28 -- Find any "<" or ">" character and determine context, i.e.
29 -- prefix = text before character, tag = text until closing ">", and rest:
30 local prefix, tag, rest = string.match(str, "^(.-)([<>][^<>]*>?)(.*)")
32 -- If no more "<" or ">" characters are found,
33 -- then return true if all tags have been closed:
34 if not tag then
35 if #stack == 0 then
36 return true
37 else
38 return false, "Not all tags have been closed"
39 end
40 end
42 -- Disallow text content (except inter-element white-space) in <ol> or <ul>
43 -- when outside <li>:
44 if list and string.find(prefix, "[^\t\n\f\r ]") then
45 return false, "Text content in list but outside list element"
46 end
48 -- Handle (expected) closing tags:
49 local closed_tagname = string.match(tag, "^</(.-)[\t\n\f\r ]*>$")
50 if closed_tagname then
51 closed_tagname = string.lower(closed_tagname)
52 if closed_tagname ~= stack[#stack] then
53 return false, "Wrong closing tag"
54 end
55 if closed_tagname == "p" then
56 para = false
57 elseif closed_tagname == "b" then
58 bold = false
59 elseif closed_tagname == "i" then
60 italic = false
61 elseif closed_tagname == "sup" or closed_tagname == "sub" then
62 supsub = false
63 elseif closed_tagname == "a" then
64 link = false
65 elseif string.find(closed_tagname, "^h[1-6]$") then
66 heading = false
67 elseif closed_tagname == "ul" or closed_tagname == "ol" then
68 list = false
69 elseif closed_tagname == "li" then
70 listelm = false
71 list = true
72 elseif closed_tagname == "pre" then
73 pre = false
74 end
75 stack[#stack] = nil
76 return loop(rest)
77 end
79 -- Allow <br> tag as void tag:
80 if string.find(tag, "^<[Bb][Rr][\t\n\f\r ]*/?>$") then
81 return loop(rest)
82 end
84 -- Parse opening tag:
85 local tagname, attrs = string.match(
86 tag,
87 "^<([^<>\0-\32]+)[\t\n\f\r ]*([^<>]-)[\t\n\f\r ]*>$"
88 )
90 -- Return false if tag could not be parsed:
91 if not tagname then
92 return false, "Malformed tag"
93 end
95 -- Make tagname lowercase:
96 tagname = string.lower(tagname)
98 -- Append closing tag to list of expected closing tags:
99 stack[#stack+1] = tagname
101 -- Allow <li> tag in proper context:
102 if tagname == "li" and attrs == "" then
103 if not list then
104 return false, "List element outside list"
105 end
106 list = false
107 listelm = true
108 return loop(rest)
109 end
111 -- If there was no valid <li> tag but <ol> or <ul> is open,
112 -- then return false:
113 if list then
114 return false
115 end
117 -- Allow <b>, <i>, <sup>, <sub> unless already open:
118 if tagname == "b" and attrs == "" then
119 if bold then
120 return false, "Bold inside bold tag"
121 end
122 bold = true
123 return loop(rest)
124 end
125 if tagname == "i" and attrs == "" then
126 if italic then
127 return false, "Italic inside italic tag"
128 end
129 italic = true
130 return loop(rest)
131 end
132 if (tagname == "sup" or tagname == "sub") and attrs == "" then
133 if supsub then
134 return false, "Super/subscript inside super/subscript tag"
135 end
136 supsub = true
137 return loop(rest)
138 end
140 -- Allow <a href="..."> tag unless already open or malformed:
141 if tagname == "a" then
142 if link then
143 return false, "Link inside link"
144 end
145 local url = string.match(attrs, '^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*"([^"]*)"$')
146 if not url then
147 url = string.match(attrs, "^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*'([^']*)'$")
148 end
149 if not url then
150 url = string.match(attrs, "^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*([^\0-\32\"'=<>`]+)$")
151 end
152 if not url then
153 return false, "Forbidden, missing, or malformed attributes in link tag"
154 end
155 if not string.find(url, "^[Hh][Tt][Tt][Pp][Ss]?://") then
156 return false, "Invalid link URL"
157 end
158 link = true
159 return loop(rest)
160 end
162 -- Always allow <pre>
163 if tagname == "pre" then
164 pre = true
165 return loop(rest)
166 end
168 -- Remaining tags require no open <p>, <b>, <i>, <sup>, <sub>,
169 -- <a href="...">, or <h1>..</h6> tag:
170 -- TODO: HTML also requires that no <pre> tag is open, but check not done
171 -- here due to used WYSIWYG editor
172 if para or bold or italic or supsub or link or heading then
173 return false, "Forbidden child tag within paragraph, bold, italic, super/subscript, link, or heading tag"
174 end
176 -- Allow <p>:
177 if tagname == "p" and attrs == "" then
178 para = true
179 return loop(rest)
180 end
182 -- Allow <h1>..<h6>:
183 if string.find(tagname, "^h[1-6]$") and attrs == "" then
184 heading = true
185 return loop(rest)
186 end
188 -- Allow <ul> and <ol>:
189 if (tagname == "ul" or tagname == "ol") and attrs == "" then
190 list = true
191 return loop(rest)
192 end
194 -- Disallow all others (including unexpected closing tags):
195 return false, "Forbidden tag or forbidden attributes"
197 end
199 -- Invoke tail-call loop:
200 return loop(str)
202 end

Impressum / About Us