liquid_feedback_frontend

annotate env/util/html_is_safe.lua @ 1842:27d2a7609cc1

Allow <pre> tag in util.html_is_safe(...)
author jbe
date Thu Feb 03 15:54:23 2022 +0100 (2022-02-03)
parents e6983d79d74f
children
rev   line source
bsw/jbe@1309 1 function util.html_is_safe(str)
bsw/jbe@1309 2
bsw/jbe@1309 3 -- All (ASCII) control characters except \t\n\f\r are forbidden:
bsw/jbe@1309 4 if string.find(str, "[\0-\8\11\14-\31\127]") then
bsw/jbe@1309 5 return false, "Invalid ASCII control character"
bsw/jbe@1309 6 end
bsw/jbe@1309 7
bsw/jbe@1309 8 -- Memorize expected closing tags:
bsw/jbe@1309 9 local stack = {}
bsw/jbe@1309 10
bsw/jbe@1309 11 -- State during parsing:
bsw/jbe@1309 12 local para = false -- <p> tag open
bsw/jbe@1309 13 local bold = false -- <b> tag open
bsw/jbe@1309 14 local italic = false -- <i> tag open
bsw/jbe@1309 15 local supsub = false -- <sup> or <sub> tag open
bsw/jbe@1309 16 local link = false -- <a href="..."> tag open
bsw/jbe@1309 17 local heading = false -- <h1-6> tag open
bsw/jbe@1309 18 local list = false -- <ol> or <ul> (but no corresponding <li>) tag open
bsw/jbe@1309 19 local listelm = false -- <li> tag (but no further <ol> or <ul> tag) open
jbe@1842 20 local pre = false -- <pre> tag open
bsw/jbe@1309 21
bsw/jbe@1309 22 -- Function looped with tail-calls:
bsw/jbe@1309 23 local function loop(str)
bsw/jbe@1309 24
bsw/jbe@1309 25 -- NOTE: We do not allow non-escaped "<" or ">" in attributes,
bsw/jbe@1309 26 -- even if HTML5 allows it.
bsw/jbe@1309 27
bsw/jbe@1309 28 -- Find any "<" or ">" character and determine context, i.e.
jbe@1842 29 -- prefix = text before character, tag = text until closing ">", and rest:
jbe@1842 30 local prefix, tag, rest = string.match(str, "^(.-)([<>][^<>]*>?)(.*)")
bsw/jbe@1309 31
bsw/jbe@1309 32 -- If no more "<" or ">" characters are found,
bsw/jbe@1309 33 -- then return true if all tags have been closed:
bsw/jbe@1309 34 if not tag then
bsw/jbe@1309 35 if #stack == 0 then
bsw/jbe@1309 36 return true
bsw/jbe@1309 37 else
bsw/jbe@1309 38 return false, "Not all tags have been closed"
bsw/jbe@1309 39 end
bsw/jbe@1309 40 end
bsw/jbe@1309 41
jbe@1841 42 -- Disallow text content (except inter-element white-space) in <ol> or <ul>
jbe@1841 43 -- when outside <li>:
jbe@1842 44 if list and string.find(prefix, "[^\t\n\f\r ]") then
jbe@1841 45 return false, "Text content in list but outside list element"
jbe@1841 46 end
jbe@1841 47
bsw/jbe@1309 48 -- Handle (expected) closing tags:
bsw/jbe@1309 49 local closed_tagname = string.match(tag, "^</(.-)[\t\n\f\r ]*>$")
bsw/jbe@1309 50 if closed_tagname then
bsw/jbe@1309 51 closed_tagname = string.lower(closed_tagname)
bsw/jbe@1309 52 if closed_tagname ~= stack[#stack] then
bsw/jbe@1309 53 return false, "Wrong closing tag"
bsw/jbe@1309 54 end
bsw/jbe@1309 55 if closed_tagname == "p" then
bsw/jbe@1309 56 para = false
bsw/jbe@1309 57 elseif closed_tagname == "b" then
bsw/jbe@1309 58 bold = false
bsw/jbe@1309 59 elseif closed_tagname == "i" then
bsw/jbe@1309 60 italic = false
bsw/jbe@1309 61 elseif closed_tagname == "sup" or closed_tagname == "sub" then
bsw/jbe@1309 62 supsub = false
bsw/jbe@1309 63 elseif closed_tagname == "a" then
bsw/jbe@1309 64 link = false
bsw/jbe@1309 65 elseif string.find(closed_tagname, "^h[1-6]$") then
bsw/jbe@1309 66 heading = false
bsw/jbe@1309 67 elseif closed_tagname == "ul" or closed_tagname == "ol" then
bsw/jbe@1309 68 list = false
bsw/jbe@1309 69 elseif closed_tagname == "li" then
bsw/jbe@1309 70 listelm = false
bsw/jbe@1309 71 list = true
jbe@1842 72 elseif closed_tagname == "pre" then
jbe@1842 73 pre = false
bsw/jbe@1309 74 end
bsw/jbe@1309 75 stack[#stack] = nil
bsw/jbe@1309 76 return loop(rest)
bsw/jbe@1309 77 end
bsw/jbe@1309 78
bsw/jbe@1309 79 -- Allow <br> tag as void tag:
bsw/jbe@1309 80 if string.find(tag, "^<[Bb][Rr][\t\n\f\r ]*/?>$") then
bsw/jbe@1309 81 return loop(rest)
bsw/jbe@1309 82 end
bsw/jbe@1309 83
bsw/jbe@1309 84 -- Parse opening tag:
bsw/jbe@1309 85 local tagname, attrs = string.match(
bsw/jbe@1309 86 tag,
bsw/jbe@1309 87 "^<([^<>\0-\32]+)[\t\n\f\r ]*([^<>]-)[\t\n\f\r ]*>$"
bsw/jbe@1309 88 )
bsw/jbe@1309 89
bsw/jbe@1309 90 -- Return false if tag could not be parsed:
bsw/jbe@1309 91 if not tagname then
bsw/jbe@1309 92 return false, "Malformed tag"
bsw/jbe@1309 93 end
bsw/jbe@1309 94
bsw/jbe@1309 95 -- Make tagname lowercase:
bsw/jbe@1309 96 tagname = string.lower(tagname)
bsw/jbe@1309 97
bsw/jbe@1309 98 -- Append closing tag to list of expected closing tags:
bsw/jbe@1309 99 stack[#stack+1] = tagname
bsw/jbe@1309 100
bsw/jbe@1309 101 -- Allow <li> tag in proper context:
bsw/jbe@1309 102 if tagname == "li" and attrs == "" then
bsw/jbe@1309 103 if not list then
bsw/jbe@1309 104 return false, "List element outside list"
bsw/jbe@1309 105 end
bsw/jbe@1309 106 list = false
bsw/jbe@1309 107 listelm = true
bsw/jbe@1309 108 return loop(rest)
bsw/jbe@1309 109 end
bsw/jbe@1309 110
bsw/jbe@1309 111 -- If there was no valid <li> tag but <ol> or <ul> is open,
bsw/jbe@1309 112 -- then return false:
bsw/jbe@1309 113 if list then
bsw/jbe@1309 114 return false
bsw/jbe@1309 115 end
bsw/jbe@1309 116
bsw/jbe@1309 117 -- Allow <b>, <i>, <sup>, <sub> unless already open:
bsw/jbe@1309 118 if tagname == "b" and attrs == "" then
bsw/jbe@1309 119 if bold then
bsw/jbe@1309 120 return false, "Bold inside bold tag"
bsw/jbe@1309 121 end
bsw/jbe@1309 122 bold = true
bsw/jbe@1309 123 return loop(rest)
bsw/jbe@1309 124 end
bsw/jbe@1309 125 if tagname == "i" and attrs == "" then
bsw/jbe@1309 126 if italic then
bsw/jbe@1309 127 return false, "Italic inside italic tag"
bsw/jbe@1309 128 end
bsw/jbe@1309 129 italic = true
bsw/jbe@1309 130 return loop(rest)
bsw/jbe@1309 131 end
bsw/jbe@1309 132 if (tagname == "sup" or tagname == "sub") and attrs == "" then
bsw/jbe@1309 133 if supsub then
bsw/jbe@1309 134 return false, "Super/subscript inside super/subscript tag"
bsw/jbe@1309 135 end
bsw/jbe@1309 136 supsub = true
bsw/jbe@1309 137 return loop(rest)
bsw/jbe@1309 138 end
bsw/jbe@1309 139
bsw/jbe@1309 140 -- Allow <a href="..."> tag unless already open or malformed:
bsw/jbe@1309 141 if tagname == "a" then
bsw/jbe@1309 142 if link then
bsw/jbe@1309 143 return false, "Link inside link"
bsw/jbe@1309 144 end
bsw/jbe@1309 145 local url = string.match(attrs, '^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*"([^"]*)"$')
bsw/jbe@1309 146 if not url then
bsw/jbe@1309 147 url = string.match(attrs, "^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*'([^']*)'$")
bsw/jbe@1309 148 end
bsw/jbe@1309 149 if not url then
bsw/jbe@1309 150 url = string.match(attrs, "^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*([^\0-\32\"'=<>`]+)$")
bsw/jbe@1309 151 end
bsw/jbe@1309 152 if not url then
bsw/jbe@1309 153 return false, "Forbidden, missing, or malformed attributes in link tag"
bsw/jbe@1309 154 end
bsw/jbe@1309 155 if not string.find(url, "^[Hh][Tt][Tt][Pp][Ss]?://") then
bsw/jbe@1309 156 return false, "Invalid link URL"
bsw/jbe@1309 157 end
bsw/jbe@1309 158 link = true
bsw/jbe@1309 159 return loop(rest)
bsw/jbe@1309 160 end
bsw/jbe@1309 161
jbe@1842 162 -- Always allow <pre>
jbe@1842 163 if tagname == "pre" then
jbe@1842 164 pre = true
jbe@1842 165 return loop(rest)
jbe@1842 166 end
jbe@1842 167
bsw/jbe@1309 168 -- Remaining tags require no open <p>, <b>, <i>, <sup>, <sub>,
bsw/jbe@1309 169 -- <a href="...">, or <h1>..</h6> tag:
jbe@1842 170 -- TODO: HTML also requires that no <pre> tag is open, but check not done
jbe@1842 171 -- here due to used WYSIWYG editor
bsw/jbe@1309 172 if para or bold or italic or supsub or link or heading then
bsw/jbe@1309 173 return false, "Forbidden child tag within paragraph, bold, italic, super/subscript, link, or heading tag"
bsw/jbe@1309 174 end
bsw/jbe@1309 175
bsw/jbe@1309 176 -- Allow <p>:
bsw/jbe@1309 177 if tagname == "p" and attrs == "" then
bsw/jbe@1309 178 para = true
bsw/jbe@1309 179 return loop(rest)
bsw/jbe@1309 180 end
bsw/jbe@1309 181
bsw/jbe@1309 182 -- Allow <h1>..<h6>:
bsw/jbe@1309 183 if string.find(tagname, "^h[1-6]$") and attrs == "" then
bsw/jbe@1309 184 heading = true
bsw/jbe@1309 185 return loop(rest)
bsw/jbe@1309 186 end
bsw/jbe@1309 187
bsw/jbe@1309 188 -- Allow <ul> and <ol>:
bsw/jbe@1309 189 if (tagname == "ul" or tagname == "ol") and attrs == "" then
bsw/jbe@1309 190 list = true
bsw/jbe@1309 191 return loop(rest)
bsw/jbe@1309 192 end
bsw/jbe@1309 193
bsw/jbe@1309 194 -- Disallow all others (including unexpected closing tags):
bsw/jbe@1309 195 return false, "Forbidden tag or forbidden attributes"
bsw/jbe@1309 196
bsw/jbe@1309 197 end
bsw/jbe@1309 198
bsw/jbe@1309 199 -- Invoke tail-call loop:
bsw/jbe@1309 200 return loop(str)
bsw/jbe@1309 201
bsw/jbe@1309 202 end

Impressum / About Us