liquid_feedback_frontend
diff env/util/html_is_safe.lua @ 1309:32cc544d5a5b
Cumulative patch for upcoming frontend version 4
author | bsw/jbe |
---|---|
date | Sun Jul 15 14:07:29 2018 +0200 (2018-07-15) |
parents | |
children | e6983d79d74f |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/env/util/html_is_safe.lua Sun Jul 15 14:07:29 2018 +0200 1.3 @@ -0,0 +1,191 @@ 1.4 +function util.html_is_safe(str) 1.5 + 1.6 + -- All (ASCII) control characters except \t\n\f\r are forbidden: 1.7 + if string.find(str, "[\0-\8\11\14-\31\127]") then 1.8 + return false, "Invalid ASCII control character" 1.9 + end 1.10 + 1.11 + -- Memorize expected closing tags: 1.12 + local stack = {} 1.13 + 1.14 + -- State during parsing: 1.15 + local para = false -- <p> tag open 1.16 + local bold = false -- <b> tag open 1.17 + local italic = false -- <i> tag open 1.18 + local supsub = false -- <sup> or <sub> tag open 1.19 + local link = false -- <a href="..."> tag open 1.20 + local heading = false -- <h1-6> tag open 1.21 + local list = false -- <ol> or <ul> (but no corresponding <li>) tag open 1.22 + local listelm = false -- <li> tag (but no further <ol> or <ul> tag) open 1.23 + 1.24 + -- Function looped with tail-calls: 1.25 + local function loop(str) 1.26 + 1.27 + -- NOTE: We do not allow non-escaped "<" or ">" in attributes, 1.28 + -- even if HTML5 allows it. 1.29 + 1.30 + -- Find any "<" or ">" character and determine context, i.e. 1.31 + -- pre = text before character, tag = text until closing ">", and rest: 1.32 + local pre, tag, rest = string.match(str, "^(.-)([<>][^<>]*>?)(.*)") 1.33 + 1.34 + -- Disallow text content (except inter-element white-space) in <ol> or <ul> 1.35 + -- when outside <li>: 1.36 + if list and string.find(pre, "[^\t\n\f\r ]") then 1.37 + return false, "Text content in list but outside list element" 1.38 + end 1.39 + 1.40 + -- If no more "<" or ">" characters are found, 1.41 + -- then return true if all tags have been closed: 1.42 + if not tag then 1.43 + if #stack == 0 then 1.44 + return true 1.45 + else 1.46 + return false, "Not all tags have been closed" 1.47 + end 1.48 + end 1.49 + 1.50 + -- Handle (expected) closing tags: 1.51 + local closed_tagname = string.match(tag, "^</(.-)[\t\n\f\r ]*>$") 1.52 + if closed_tagname then 1.53 + closed_tagname = string.lower(closed_tagname) 1.54 + if closed_tagname ~= stack[#stack] then 1.55 + return false, "Wrong closing tag" 1.56 + end 1.57 + if closed_tagname == "p" then 1.58 + para = false 1.59 + elseif closed_tagname == "b" then 1.60 + bold = false 1.61 + elseif closed_tagname == "i" then 1.62 + italic = false 1.63 + elseif closed_tagname == "sup" or closed_tagname == "sub" then 1.64 + supsub = false 1.65 + elseif closed_tagname == "a" then 1.66 + link = false 1.67 + elseif string.find(closed_tagname, "^h[1-6]$") then 1.68 + heading = false 1.69 + elseif closed_tagname == "ul" or closed_tagname == "ol" then 1.70 + list = false 1.71 + elseif closed_tagname == "li" then 1.72 + listelm = false 1.73 + list = true 1.74 + end 1.75 + stack[#stack] = nil 1.76 + return loop(rest) 1.77 + end 1.78 + 1.79 + -- Allow <br> tag as void tag: 1.80 + if string.find(tag, "^<[Bb][Rr][\t\n\f\r ]*/?>$") then 1.81 + return loop(rest) 1.82 + end 1.83 + 1.84 + -- Parse opening tag: 1.85 + local tagname, attrs = string.match( 1.86 + tag, 1.87 + "^<([^<>\0-\32]+)[\t\n\f\r ]*([^<>]-)[\t\n\f\r ]*>$" 1.88 + ) 1.89 + 1.90 + -- Return false if tag could not be parsed: 1.91 + if not tagname then 1.92 + return false, "Malformed tag" 1.93 + end 1.94 + 1.95 + -- Make tagname lowercase: 1.96 + tagname = string.lower(tagname) 1.97 + 1.98 + -- Append closing tag to list of expected closing tags: 1.99 + stack[#stack+1] = tagname 1.100 + 1.101 + -- Allow <li> tag in proper context: 1.102 + if tagname == "li" and attrs == "" then 1.103 + if not list then 1.104 + return false, "List element outside list" 1.105 + end 1.106 + list = false 1.107 + listelm = true 1.108 + return loop(rest) 1.109 + end 1.110 + 1.111 + -- If there was no valid <li> tag but <ol> or <ul> is open, 1.112 + -- then return false: 1.113 + if list then 1.114 + return false 1.115 + end 1.116 + 1.117 + -- Allow <b>, <i>, <sup>, <sub> unless already open: 1.118 + if tagname == "b" and attrs == "" then 1.119 + if bold then 1.120 + return false, "Bold inside bold tag" 1.121 + end 1.122 + bold = true 1.123 + return loop(rest) 1.124 + end 1.125 + if tagname == "i" and attrs == "" then 1.126 + if italic then 1.127 + return false, "Italic inside italic tag" 1.128 + end 1.129 + italic = true 1.130 + return loop(rest) 1.131 + end 1.132 + if (tagname == "sup" or tagname == "sub") and attrs == "" then 1.133 + if supsub then 1.134 + return false, "Super/subscript inside super/subscript tag" 1.135 + end 1.136 + supsub = true 1.137 + return loop(rest) 1.138 + end 1.139 + 1.140 + -- Allow <a href="..."> tag unless already open or malformed: 1.141 + if tagname == "a" then 1.142 + if link then 1.143 + return false, "Link inside link" 1.144 + end 1.145 + local url = string.match(attrs, '^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*"([^"]*)"$') 1.146 + if not url then 1.147 + url = string.match(attrs, "^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*'([^']*)'$") 1.148 + end 1.149 + if not url then 1.150 + url = string.match(attrs, "^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*([^\0-\32\"'=<>`]+)$") 1.151 + end 1.152 + if not url then 1.153 + return false, "Forbidden, missing, or malformed attributes in link tag" 1.154 + end 1.155 + if not string.find(url, "^[Hh][Tt][Tt][Pp][Ss]?://") then 1.156 + return false, "Invalid link URL" 1.157 + end 1.158 + link = true 1.159 + return loop(rest) 1.160 + end 1.161 + 1.162 + -- Remaining tags require no open <p>, <b>, <i>, <sup>, <sub>, 1.163 + -- <a href="...">, or <h1>..</h6> tag: 1.164 + if para or bold or italic or supsub or link or heading then 1.165 + return false, "Forbidden child tag within paragraph, bold, italic, super/subscript, link, or heading tag" 1.166 + end 1.167 + 1.168 + -- Allow <p>: 1.169 + if tagname == "p" and attrs == "" then 1.170 + para = true 1.171 + return loop(rest) 1.172 + end 1.173 + 1.174 + -- Allow <h1>..<h6>: 1.175 + if string.find(tagname, "^h[1-6]$") and attrs == "" then 1.176 + heading = true 1.177 + return loop(rest) 1.178 + end 1.179 + 1.180 + -- Allow <ul> and <ol>: 1.181 + if (tagname == "ul" or tagname == "ol") and attrs == "" then 1.182 + list = true 1.183 + return loop(rest) 1.184 + end 1.185 + 1.186 + -- Disallow all others (including unexpected closing tags): 1.187 + return false, "Forbidden tag or forbidden attributes" 1.188 + 1.189 + end 1.190 + 1.191 + -- Invoke tail-call loop: 1.192 + return loop(str) 1.193 + 1.194 +end