rev |
line source |
bsw/jbe@1309
|
1 function util.html_is_safe(str)
|
bsw/jbe@1309
|
2
|
bsw/jbe@1309
|
3 -- All (ASCII) control characters except \t\n\f\r are forbidden:
|
bsw/jbe@1309
|
4 if string.find(str, "[\0-\8\11\14-\31\127]") then
|
bsw/jbe@1309
|
5 return false, "Invalid ASCII control character"
|
bsw/jbe@1309
|
6 end
|
bsw/jbe@1309
|
7
|
bsw/jbe@1309
|
8 -- Memorize expected closing tags:
|
bsw/jbe@1309
|
9 local stack = {}
|
bsw/jbe@1309
|
10
|
bsw/jbe@1309
|
11 -- State during parsing:
|
bsw/jbe@1309
|
12 local para = false -- <p> tag open
|
bsw/jbe@1309
|
13 local bold = false -- <b> tag open
|
bsw/jbe@1309
|
14 local italic = false -- <i> tag open
|
bsw/jbe@1309
|
15 local supsub = false -- <sup> or <sub> tag open
|
bsw/jbe@1309
|
16 local link = false -- <a href="..."> tag open
|
bsw/jbe@1309
|
17 local heading = false -- <h1-6> tag open
|
bsw/jbe@1309
|
18 local list = false -- <ol> or <ul> (but no corresponding <li>) tag open
|
bsw/jbe@1309
|
19 local listelm = false -- <li> tag (but no further <ol> or <ul> tag) open
|
jbe@1842
|
20 local pre = false -- <pre> tag open
|
bsw/jbe@1309
|
21
|
bsw/jbe@1309
|
22 -- Function looped with tail-calls:
|
bsw/jbe@1309
|
23 local function loop(str)
|
bsw/jbe@1309
|
24
|
bsw/jbe@1309
|
25 -- NOTE: We do not allow non-escaped "<" or ">" in attributes,
|
bsw/jbe@1309
|
26 -- even if HTML5 allows it.
|
bsw/jbe@1309
|
27
|
bsw/jbe@1309
|
28 -- Find any "<" or ">" character and determine context, i.e.
|
jbe@1842
|
29 -- prefix = text before character, tag = text until closing ">", and rest:
|
jbe@1842
|
30 local prefix, tag, rest = string.match(str, "^(.-)([<>][^<>]*>?)(.*)")
|
bsw/jbe@1309
|
31
|
bsw/jbe@1309
|
32 -- If no more "<" or ">" characters are found,
|
bsw/jbe@1309
|
33 -- then return true if all tags have been closed:
|
bsw/jbe@1309
|
34 if not tag then
|
bsw/jbe@1309
|
35 if #stack == 0 then
|
bsw/jbe@1309
|
36 return true
|
bsw/jbe@1309
|
37 else
|
bsw/jbe@1309
|
38 return false, "Not all tags have been closed"
|
bsw/jbe@1309
|
39 end
|
bsw/jbe@1309
|
40 end
|
bsw/jbe@1309
|
41
|
jbe@1841
|
42 -- Disallow text content (except inter-element white-space) in <ol> or <ul>
|
jbe@1841
|
43 -- when outside <li>:
|
jbe@1842
|
44 if list and string.find(prefix, "[^\t\n\f\r ]") then
|
jbe@1841
|
45 return false, "Text content in list but outside list element"
|
jbe@1841
|
46 end
|
jbe@1841
|
47
|
bsw/jbe@1309
|
48 -- Handle (expected) closing tags:
|
bsw/jbe@1309
|
49 local closed_tagname = string.match(tag, "^</(.-)[\t\n\f\r ]*>$")
|
bsw/jbe@1309
|
50 if closed_tagname then
|
bsw/jbe@1309
|
51 closed_tagname = string.lower(closed_tagname)
|
bsw/jbe@1309
|
52 if closed_tagname ~= stack[#stack] then
|
bsw/jbe@1309
|
53 return false, "Wrong closing tag"
|
bsw/jbe@1309
|
54 end
|
bsw/jbe@1309
|
55 if closed_tagname == "p" then
|
bsw/jbe@1309
|
56 para = false
|
bsw/jbe@1309
|
57 elseif closed_tagname == "b" then
|
bsw/jbe@1309
|
58 bold = false
|
bsw/jbe@1309
|
59 elseif closed_tagname == "i" then
|
bsw/jbe@1309
|
60 italic = false
|
bsw/jbe@1309
|
61 elseif closed_tagname == "sup" or closed_tagname == "sub" then
|
bsw/jbe@1309
|
62 supsub = false
|
bsw/jbe@1309
|
63 elseif closed_tagname == "a" then
|
bsw/jbe@1309
|
64 link = false
|
bsw/jbe@1309
|
65 elseif string.find(closed_tagname, "^h[1-6]$") then
|
bsw/jbe@1309
|
66 heading = false
|
bsw/jbe@1309
|
67 elseif closed_tagname == "ul" or closed_tagname == "ol" then
|
bsw/jbe@1309
|
68 list = false
|
bsw/jbe@1309
|
69 elseif closed_tagname == "li" then
|
bsw/jbe@1309
|
70 listelm = false
|
bsw/jbe@1309
|
71 list = true
|
jbe@1842
|
72 elseif closed_tagname == "pre" then
|
jbe@1842
|
73 pre = false
|
bsw/jbe@1309
|
74 end
|
bsw/jbe@1309
|
75 stack[#stack] = nil
|
bsw/jbe@1309
|
76 return loop(rest)
|
bsw/jbe@1309
|
77 end
|
bsw/jbe@1309
|
78
|
bsw/jbe@1309
|
79 -- Allow <br> tag as void tag:
|
bsw/jbe@1309
|
80 if string.find(tag, "^<[Bb][Rr][\t\n\f\r ]*/?>$") then
|
bsw/jbe@1309
|
81 return loop(rest)
|
bsw/jbe@1309
|
82 end
|
bsw/jbe@1309
|
83
|
bsw/jbe@1309
|
84 -- Parse opening tag:
|
bsw/jbe@1309
|
85 local tagname, attrs = string.match(
|
bsw/jbe@1309
|
86 tag,
|
bsw/jbe@1309
|
87 "^<([^<>\0-\32]+)[\t\n\f\r ]*([^<>]-)[\t\n\f\r ]*>$"
|
bsw/jbe@1309
|
88 )
|
bsw/jbe@1309
|
89
|
bsw/jbe@1309
|
90 -- Return false if tag could not be parsed:
|
bsw/jbe@1309
|
91 if not tagname then
|
bsw/jbe@1309
|
92 return false, "Malformed tag"
|
bsw/jbe@1309
|
93 end
|
bsw/jbe@1309
|
94
|
bsw/jbe@1309
|
95 -- Make tagname lowercase:
|
bsw/jbe@1309
|
96 tagname = string.lower(tagname)
|
bsw/jbe@1309
|
97
|
bsw/jbe@1309
|
98 -- Append closing tag to list of expected closing tags:
|
bsw/jbe@1309
|
99 stack[#stack+1] = tagname
|
bsw/jbe@1309
|
100
|
bsw/jbe@1309
|
101 -- Allow <li> tag in proper context:
|
bsw/jbe@1309
|
102 if tagname == "li" and attrs == "" then
|
bsw/jbe@1309
|
103 if not list then
|
bsw/jbe@1309
|
104 return false, "List element outside list"
|
bsw/jbe@1309
|
105 end
|
bsw/jbe@1309
|
106 list = false
|
bsw/jbe@1309
|
107 listelm = true
|
bsw/jbe@1309
|
108 return loop(rest)
|
bsw/jbe@1309
|
109 end
|
bsw/jbe@1309
|
110
|
bsw/jbe@1309
|
111 -- If there was no valid <li> tag but <ol> or <ul> is open,
|
bsw/jbe@1309
|
112 -- then return false:
|
bsw/jbe@1309
|
113 if list then
|
bsw/jbe@1309
|
114 return false
|
bsw/jbe@1309
|
115 end
|
bsw/jbe@1309
|
116
|
bsw/jbe@1309
|
117 -- Allow <b>, <i>, <sup>, <sub> unless already open:
|
bsw/jbe@1309
|
118 if tagname == "b" and attrs == "" then
|
bsw/jbe@1309
|
119 if bold then
|
bsw/jbe@1309
|
120 return false, "Bold inside bold tag"
|
bsw/jbe@1309
|
121 end
|
bsw/jbe@1309
|
122 bold = true
|
bsw/jbe@1309
|
123 return loop(rest)
|
bsw/jbe@1309
|
124 end
|
bsw/jbe@1309
|
125 if tagname == "i" and attrs == "" then
|
bsw/jbe@1309
|
126 if italic then
|
bsw/jbe@1309
|
127 return false, "Italic inside italic tag"
|
bsw/jbe@1309
|
128 end
|
bsw/jbe@1309
|
129 italic = true
|
bsw/jbe@1309
|
130 return loop(rest)
|
bsw/jbe@1309
|
131 end
|
bsw/jbe@1309
|
132 if (tagname == "sup" or tagname == "sub") and attrs == "" then
|
bsw/jbe@1309
|
133 if supsub then
|
bsw/jbe@1309
|
134 return false, "Super/subscript inside super/subscript tag"
|
bsw/jbe@1309
|
135 end
|
bsw/jbe@1309
|
136 supsub = true
|
bsw/jbe@1309
|
137 return loop(rest)
|
bsw/jbe@1309
|
138 end
|
bsw/jbe@1309
|
139
|
bsw/jbe@1309
|
140 -- Allow <a href="..."> tag unless already open or malformed:
|
bsw/jbe@1309
|
141 if tagname == "a" then
|
bsw/jbe@1309
|
142 if link then
|
bsw/jbe@1309
|
143 return false, "Link inside link"
|
bsw/jbe@1309
|
144 end
|
bsw/jbe@1309
|
145 local url = string.match(attrs, '^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*"([^"]*)"$')
|
bsw/jbe@1309
|
146 if not url then
|
bsw/jbe@1309
|
147 url = string.match(attrs, "^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*'([^']*)'$")
|
bsw/jbe@1309
|
148 end
|
bsw/jbe@1309
|
149 if not url then
|
bsw/jbe@1309
|
150 url = string.match(attrs, "^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*([^\0-\32\"'=<>`]+)$")
|
bsw/jbe@1309
|
151 end
|
bsw/jbe@1309
|
152 if not url then
|
bsw/jbe@1309
|
153 return false, "Forbidden, missing, or malformed attributes in link tag"
|
bsw/jbe@1309
|
154 end
|
bsw/jbe@1309
|
155 if not string.find(url, "^[Hh][Tt][Tt][Pp][Ss]?://") then
|
bsw/jbe@1309
|
156 return false, "Invalid link URL"
|
bsw/jbe@1309
|
157 end
|
bsw/jbe@1309
|
158 link = true
|
bsw/jbe@1309
|
159 return loop(rest)
|
bsw/jbe@1309
|
160 end
|
bsw/jbe@1309
|
161
|
jbe@1842
|
162 -- Always allow <pre>
|
jbe@1842
|
163 if tagname == "pre" then
|
jbe@1842
|
164 pre = true
|
jbe@1842
|
165 return loop(rest)
|
jbe@1842
|
166 end
|
jbe@1842
|
167
|
bsw/jbe@1309
|
168 -- Remaining tags require no open <p>, <b>, <i>, <sup>, <sub>,
|
bsw/jbe@1309
|
169 -- <a href="...">, or <h1>..</h6> tag:
|
jbe@1842
|
170 -- TODO: HTML also requires that no <pre> tag is open, but check not done
|
jbe@1842
|
171 -- here due to used WYSIWYG editor
|
bsw/jbe@1309
|
172 if para or bold or italic or supsub or link or heading then
|
bsw/jbe@1309
|
173 return false, "Forbidden child tag within paragraph, bold, italic, super/subscript, link, or heading tag"
|
bsw/jbe@1309
|
174 end
|
bsw/jbe@1309
|
175
|
bsw/jbe@1309
|
176 -- Allow <p>:
|
bsw/jbe@1309
|
177 if tagname == "p" and attrs == "" then
|
bsw/jbe@1309
|
178 para = true
|
bsw/jbe@1309
|
179 return loop(rest)
|
bsw/jbe@1309
|
180 end
|
bsw/jbe@1309
|
181
|
bsw/jbe@1309
|
182 -- Allow <h1>..<h6>:
|
bsw/jbe@1309
|
183 if string.find(tagname, "^h[1-6]$") and attrs == "" then
|
bsw/jbe@1309
|
184 heading = true
|
bsw/jbe@1309
|
185 return loop(rest)
|
bsw/jbe@1309
|
186 end
|
bsw/jbe@1309
|
187
|
bsw/jbe@1309
|
188 -- Allow <ul> and <ol>:
|
bsw/jbe@1309
|
189 if (tagname == "ul" or tagname == "ol") and attrs == "" then
|
bsw/jbe@1309
|
190 list = true
|
bsw/jbe@1309
|
191 return loop(rest)
|
bsw/jbe@1309
|
192 end
|
bsw/jbe@1309
|
193
|
bsw/jbe@1309
|
194 -- Disallow all others (including unexpected closing tags):
|
bsw/jbe@1309
|
195 return false, "Forbidden tag or forbidden attributes"
|
bsw/jbe@1309
|
196
|
bsw/jbe@1309
|
197 end
|
bsw/jbe@1309
|
198
|
bsw/jbe@1309
|
199 -- Invoke tail-call loop:
|
bsw/jbe@1309
|
200 return loop(str)
|
bsw/jbe@1309
|
201
|
bsw/jbe@1309
|
202 end
|