rev |
line source |
bsw/jbe@1309
|
1 function util.html_is_safe(str)
|
bsw/jbe@1309
|
2
|
bsw/jbe@1309
|
3 -- All (ASCII) control characters except \t\n\f\r are forbidden:
|
bsw/jbe@1309
|
4 if string.find(str, "[\0-\8\11\14-\31\127]") then
|
bsw/jbe@1309
|
5 return false, "Invalid ASCII control character"
|
bsw/jbe@1309
|
6 end
|
bsw/jbe@1309
|
7
|
bsw/jbe@1309
|
8 -- Memorize expected closing tags:
|
bsw/jbe@1309
|
9 local stack = {}
|
bsw/jbe@1309
|
10
|
bsw/jbe@1309
|
11 -- State during parsing:
|
bsw/jbe@1309
|
12 local para = false -- <p> tag open
|
bsw/jbe@1309
|
13 local bold = false -- <b> tag open
|
bsw/jbe@1309
|
14 local italic = false -- <i> tag open
|
bsw/jbe@1309
|
15 local supsub = false -- <sup> or <sub> tag open
|
bsw/jbe@1309
|
16 local link = false -- <a href="..."> tag open
|
bsw/jbe@1309
|
17 local heading = false -- <h1-6> tag open
|
bsw/jbe@1309
|
18 local list = false -- <ol> or <ul> (but no corresponding <li>) tag open
|
bsw/jbe@1309
|
19 local listelm = false -- <li> tag (but no further <ol> or <ul> tag) open
|
bsw/jbe@1309
|
20
|
bsw/jbe@1309
|
21 -- Function looped with tail-calls:
|
bsw/jbe@1309
|
22 local function loop(str)
|
bsw/jbe@1309
|
23
|
bsw/jbe@1309
|
24 -- NOTE: We do not allow non-escaped "<" or ">" in attributes,
|
bsw/jbe@1309
|
25 -- even if HTML5 allows it.
|
bsw/jbe@1309
|
26
|
bsw/jbe@1309
|
27 -- Find any "<" or ">" character and determine context, i.e.
|
bsw/jbe@1309
|
28 -- pre = text before character, tag = text until closing ">", and rest:
|
bsw/jbe@1309
|
29 local pre, tag, rest = string.match(str, "^(.-)([<>][^<>]*>?)(.*)")
|
bsw/jbe@1309
|
30
|
bsw/jbe@1309
|
31 -- Disallow text content (except inter-element white-space) in <ol> or <ul>
|
bsw/jbe@1309
|
32 -- when outside <li>:
|
bsw/jbe@1309
|
33 if list and string.find(pre, "[^\t\n\f\r ]") then
|
bsw/jbe@1309
|
34 return false, "Text content in list but outside list element"
|
bsw/jbe@1309
|
35 end
|
bsw/jbe@1309
|
36
|
bsw/jbe@1309
|
37 -- If no more "<" or ">" characters are found,
|
bsw/jbe@1309
|
38 -- then return true if all tags have been closed:
|
bsw/jbe@1309
|
39 if not tag then
|
bsw/jbe@1309
|
40 if #stack == 0 then
|
bsw/jbe@1309
|
41 return true
|
bsw/jbe@1309
|
42 else
|
bsw/jbe@1309
|
43 return false, "Not all tags have been closed"
|
bsw/jbe@1309
|
44 end
|
bsw/jbe@1309
|
45 end
|
bsw/jbe@1309
|
46
|
bsw/jbe@1309
|
47 -- Handle (expected) closing tags:
|
bsw/jbe@1309
|
48 local closed_tagname = string.match(tag, "^</(.-)[\t\n\f\r ]*>$")
|
bsw/jbe@1309
|
49 if closed_tagname then
|
bsw/jbe@1309
|
50 closed_tagname = string.lower(closed_tagname)
|
bsw/jbe@1309
|
51 if closed_tagname ~= stack[#stack] then
|
bsw/jbe@1309
|
52 return false, "Wrong closing tag"
|
bsw/jbe@1309
|
53 end
|
bsw/jbe@1309
|
54 if closed_tagname == "p" then
|
bsw/jbe@1309
|
55 para = false
|
bsw/jbe@1309
|
56 elseif closed_tagname == "b" then
|
bsw/jbe@1309
|
57 bold = false
|
bsw/jbe@1309
|
58 elseif closed_tagname == "i" then
|
bsw/jbe@1309
|
59 italic = false
|
bsw/jbe@1309
|
60 elseif closed_tagname == "sup" or closed_tagname == "sub" then
|
bsw/jbe@1309
|
61 supsub = false
|
bsw/jbe@1309
|
62 elseif closed_tagname == "a" then
|
bsw/jbe@1309
|
63 link = false
|
bsw/jbe@1309
|
64 elseif string.find(closed_tagname, "^h[1-6]$") then
|
bsw/jbe@1309
|
65 heading = false
|
bsw/jbe@1309
|
66 elseif closed_tagname == "ul" or closed_tagname == "ol" then
|
bsw/jbe@1309
|
67 list = false
|
bsw/jbe@1309
|
68 elseif closed_tagname == "li" then
|
bsw/jbe@1309
|
69 listelm = false
|
bsw/jbe@1309
|
70 list = true
|
bsw/jbe@1309
|
71 end
|
bsw/jbe@1309
|
72 stack[#stack] = nil
|
bsw/jbe@1309
|
73 return loop(rest)
|
bsw/jbe@1309
|
74 end
|
bsw/jbe@1309
|
75
|
bsw/jbe@1309
|
76 -- Allow <br> tag as void tag:
|
bsw/jbe@1309
|
77 if string.find(tag, "^<[Bb][Rr][\t\n\f\r ]*/?>$") then
|
bsw/jbe@1309
|
78 return loop(rest)
|
bsw/jbe@1309
|
79 end
|
bsw/jbe@1309
|
80
|
bsw/jbe@1309
|
81 -- Parse opening tag:
|
bsw/jbe@1309
|
82 local tagname, attrs = string.match(
|
bsw/jbe@1309
|
83 tag,
|
bsw/jbe@1309
|
84 "^<([^<>\0-\32]+)[\t\n\f\r ]*([^<>]-)[\t\n\f\r ]*>$"
|
bsw/jbe@1309
|
85 )
|
bsw/jbe@1309
|
86
|
bsw/jbe@1309
|
87 -- Return false if tag could not be parsed:
|
bsw/jbe@1309
|
88 if not tagname then
|
bsw/jbe@1309
|
89 return false, "Malformed tag"
|
bsw/jbe@1309
|
90 end
|
bsw/jbe@1309
|
91
|
bsw/jbe@1309
|
92 -- Make tagname lowercase:
|
bsw/jbe@1309
|
93 tagname = string.lower(tagname)
|
bsw/jbe@1309
|
94
|
bsw/jbe@1309
|
95 -- Append closing tag to list of expected closing tags:
|
bsw/jbe@1309
|
96 stack[#stack+1] = tagname
|
bsw/jbe@1309
|
97
|
bsw/jbe@1309
|
98 -- Allow <li> tag in proper context:
|
bsw/jbe@1309
|
99 if tagname == "li" and attrs == "" then
|
bsw/jbe@1309
|
100 if not list then
|
bsw/jbe@1309
|
101 return false, "List element outside list"
|
bsw/jbe@1309
|
102 end
|
bsw/jbe@1309
|
103 list = false
|
bsw/jbe@1309
|
104 listelm = true
|
bsw/jbe@1309
|
105 return loop(rest)
|
bsw/jbe@1309
|
106 end
|
bsw/jbe@1309
|
107
|
bsw/jbe@1309
|
108 -- If there was no valid <li> tag but <ol> or <ul> is open,
|
bsw/jbe@1309
|
109 -- then return false:
|
bsw/jbe@1309
|
110 if list then
|
bsw/jbe@1309
|
111 return false
|
bsw/jbe@1309
|
112 end
|
bsw/jbe@1309
|
113
|
bsw/jbe@1309
|
114 -- Allow <b>, <i>, <sup>, <sub> unless already open:
|
bsw/jbe@1309
|
115 if tagname == "b" and attrs == "" then
|
bsw/jbe@1309
|
116 if bold then
|
bsw/jbe@1309
|
117 return false, "Bold inside bold tag"
|
bsw/jbe@1309
|
118 end
|
bsw/jbe@1309
|
119 bold = true
|
bsw/jbe@1309
|
120 return loop(rest)
|
bsw/jbe@1309
|
121 end
|
bsw/jbe@1309
|
122 if tagname == "i" and attrs == "" then
|
bsw/jbe@1309
|
123 if italic then
|
bsw/jbe@1309
|
124 return false, "Italic inside italic tag"
|
bsw/jbe@1309
|
125 end
|
bsw/jbe@1309
|
126 italic = true
|
bsw/jbe@1309
|
127 return loop(rest)
|
bsw/jbe@1309
|
128 end
|
bsw/jbe@1309
|
129 if (tagname == "sup" or tagname == "sub") and attrs == "" then
|
bsw/jbe@1309
|
130 if supsub then
|
bsw/jbe@1309
|
131 return false, "Super/subscript inside super/subscript tag"
|
bsw/jbe@1309
|
132 end
|
bsw/jbe@1309
|
133 supsub = true
|
bsw/jbe@1309
|
134 return loop(rest)
|
bsw/jbe@1309
|
135 end
|
bsw/jbe@1309
|
136
|
bsw/jbe@1309
|
137 -- Allow <a href="..."> tag unless already open or malformed:
|
bsw/jbe@1309
|
138 if tagname == "a" then
|
bsw/jbe@1309
|
139 if link then
|
bsw/jbe@1309
|
140 return false, "Link inside link"
|
bsw/jbe@1309
|
141 end
|
bsw/jbe@1309
|
142 local url = string.match(attrs, '^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*"([^"]*)"$')
|
bsw/jbe@1309
|
143 if not url then
|
bsw/jbe@1309
|
144 url = string.match(attrs, "^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*'([^']*)'$")
|
bsw/jbe@1309
|
145 end
|
bsw/jbe@1309
|
146 if not url then
|
bsw/jbe@1309
|
147 url = string.match(attrs, "^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*([^\0-\32\"'=<>`]+)$")
|
bsw/jbe@1309
|
148 end
|
bsw/jbe@1309
|
149 if not url then
|
bsw/jbe@1309
|
150 return false, "Forbidden, missing, or malformed attributes in link tag"
|
bsw/jbe@1309
|
151 end
|
bsw/jbe@1309
|
152 if not string.find(url, "^[Hh][Tt][Tt][Pp][Ss]?://") then
|
bsw/jbe@1309
|
153 return false, "Invalid link URL"
|
bsw/jbe@1309
|
154 end
|
bsw/jbe@1309
|
155 link = true
|
bsw/jbe@1309
|
156 return loop(rest)
|
bsw/jbe@1309
|
157 end
|
bsw/jbe@1309
|
158
|
bsw/jbe@1309
|
159 -- Remaining tags require no open <p>, <b>, <i>, <sup>, <sub>,
|
bsw/jbe@1309
|
160 -- <a href="...">, or <h1>..</h6> tag:
|
bsw/jbe@1309
|
161 if para or bold or italic or supsub or link or heading then
|
bsw/jbe@1309
|
162 return false, "Forbidden child tag within paragraph, bold, italic, super/subscript, link, or heading tag"
|
bsw/jbe@1309
|
163 end
|
bsw/jbe@1309
|
164
|
bsw/jbe@1309
|
165 -- Allow <p>:
|
bsw/jbe@1309
|
166 if tagname == "p" and attrs == "" then
|
bsw/jbe@1309
|
167 para = true
|
bsw/jbe@1309
|
168 return loop(rest)
|
bsw/jbe@1309
|
169 end
|
bsw/jbe@1309
|
170
|
bsw/jbe@1309
|
171 -- Allow <h1>..<h6>:
|
bsw/jbe@1309
|
172 if string.find(tagname, "^h[1-6]$") and attrs == "" then
|
bsw/jbe@1309
|
173 heading = true
|
bsw/jbe@1309
|
174 return loop(rest)
|
bsw/jbe@1309
|
175 end
|
bsw/jbe@1309
|
176
|
bsw/jbe@1309
|
177 -- Allow <ul> and <ol>:
|
bsw/jbe@1309
|
178 if (tagname == "ul" or tagname == "ol") and attrs == "" then
|
bsw/jbe@1309
|
179 list = true
|
bsw/jbe@1309
|
180 return loop(rest)
|
bsw/jbe@1309
|
181 end
|
bsw/jbe@1309
|
182
|
bsw/jbe@1309
|
183 -- Disallow all others (including unexpected closing tags):
|
bsw/jbe@1309
|
184 return false, "Forbidden tag or forbidden attributes"
|
bsw/jbe@1309
|
185
|
bsw/jbe@1309
|
186 end
|
bsw/jbe@1309
|
187
|
bsw/jbe@1309
|
188 -- Invoke tail-call loop:
|
bsw/jbe@1309
|
189 return loop(str)
|
bsw/jbe@1309
|
190
|
bsw/jbe@1309
|
191 end
|