liquid_feedback_frontend

annotate env/util/html_is_safe.lua @ 1489:3ab878ba277e

Accessibility improvements part 1
author bsw
date Mon Feb 18 19:46:36 2019 +0100 (2019-02-18)
parents 32cc544d5a5b
children e6983d79d74f
rev   line source
bsw/jbe@1309 1 function util.html_is_safe(str)
bsw/jbe@1309 2
bsw/jbe@1309 3 -- All (ASCII) control characters except \t\n\f\r are forbidden:
bsw/jbe@1309 4 if string.find(str, "[\0-\8\11\14-\31\127]") then
bsw/jbe@1309 5 return false, "Invalid ASCII control character"
bsw/jbe@1309 6 end
bsw/jbe@1309 7
bsw/jbe@1309 8 -- Memorize expected closing tags:
bsw/jbe@1309 9 local stack = {}
bsw/jbe@1309 10
bsw/jbe@1309 11 -- State during parsing:
bsw/jbe@1309 12 local para = false -- <p> tag open
bsw/jbe@1309 13 local bold = false -- <b> tag open
bsw/jbe@1309 14 local italic = false -- <i> tag open
bsw/jbe@1309 15 local supsub = false -- <sup> or <sub> tag open
bsw/jbe@1309 16 local link = false -- <a href="..."> tag open
bsw/jbe@1309 17 local heading = false -- <h1-6> tag open
bsw/jbe@1309 18 local list = false -- <ol> or <ul> (but no corresponding <li>) tag open
bsw/jbe@1309 19 local listelm = false -- <li> tag (but no further <ol> or <ul> tag) open
bsw/jbe@1309 20
bsw/jbe@1309 21 -- Function looped with tail-calls:
bsw/jbe@1309 22 local function loop(str)
bsw/jbe@1309 23
bsw/jbe@1309 24 -- NOTE: We do not allow non-escaped "<" or ">" in attributes,
bsw/jbe@1309 25 -- even if HTML5 allows it.
bsw/jbe@1309 26
bsw/jbe@1309 27 -- Find any "<" or ">" character and determine context, i.e.
bsw/jbe@1309 28 -- pre = text before character, tag = text until closing ">", and rest:
bsw/jbe@1309 29 local pre, tag, rest = string.match(str, "^(.-)([<>][^<>]*>?)(.*)")
bsw/jbe@1309 30
bsw/jbe@1309 31 -- Disallow text content (except inter-element white-space) in <ol> or <ul>
bsw/jbe@1309 32 -- when outside <li>:
bsw/jbe@1309 33 if list and string.find(pre, "[^\t\n\f\r ]") then
bsw/jbe@1309 34 return false, "Text content in list but outside list element"
bsw/jbe@1309 35 end
bsw/jbe@1309 36
bsw/jbe@1309 37 -- If no more "<" or ">" characters are found,
bsw/jbe@1309 38 -- then return true if all tags have been closed:
bsw/jbe@1309 39 if not tag then
bsw/jbe@1309 40 if #stack == 0 then
bsw/jbe@1309 41 return true
bsw/jbe@1309 42 else
bsw/jbe@1309 43 return false, "Not all tags have been closed"
bsw/jbe@1309 44 end
bsw/jbe@1309 45 end
bsw/jbe@1309 46
bsw/jbe@1309 47 -- Handle (expected) closing tags:
bsw/jbe@1309 48 local closed_tagname = string.match(tag, "^</(.-)[\t\n\f\r ]*>$")
bsw/jbe@1309 49 if closed_tagname then
bsw/jbe@1309 50 closed_tagname = string.lower(closed_tagname)
bsw/jbe@1309 51 if closed_tagname ~= stack[#stack] then
bsw/jbe@1309 52 return false, "Wrong closing tag"
bsw/jbe@1309 53 end
bsw/jbe@1309 54 if closed_tagname == "p" then
bsw/jbe@1309 55 para = false
bsw/jbe@1309 56 elseif closed_tagname == "b" then
bsw/jbe@1309 57 bold = false
bsw/jbe@1309 58 elseif closed_tagname == "i" then
bsw/jbe@1309 59 italic = false
bsw/jbe@1309 60 elseif closed_tagname == "sup" or closed_tagname == "sub" then
bsw/jbe@1309 61 supsub = false
bsw/jbe@1309 62 elseif closed_tagname == "a" then
bsw/jbe@1309 63 link = false
bsw/jbe@1309 64 elseif string.find(closed_tagname, "^h[1-6]$") then
bsw/jbe@1309 65 heading = false
bsw/jbe@1309 66 elseif closed_tagname == "ul" or closed_tagname == "ol" then
bsw/jbe@1309 67 list = false
bsw/jbe@1309 68 elseif closed_tagname == "li" then
bsw/jbe@1309 69 listelm = false
bsw/jbe@1309 70 list = true
bsw/jbe@1309 71 end
bsw/jbe@1309 72 stack[#stack] = nil
bsw/jbe@1309 73 return loop(rest)
bsw/jbe@1309 74 end
bsw/jbe@1309 75
bsw/jbe@1309 76 -- Allow <br> tag as void tag:
bsw/jbe@1309 77 if string.find(tag, "^<[Bb][Rr][\t\n\f\r ]*/?>$") then
bsw/jbe@1309 78 return loop(rest)
bsw/jbe@1309 79 end
bsw/jbe@1309 80
bsw/jbe@1309 81 -- Parse opening tag:
bsw/jbe@1309 82 local tagname, attrs = string.match(
bsw/jbe@1309 83 tag,
bsw/jbe@1309 84 "^<([^<>\0-\32]+)[\t\n\f\r ]*([^<>]-)[\t\n\f\r ]*>$"
bsw/jbe@1309 85 )
bsw/jbe@1309 86
bsw/jbe@1309 87 -- Return false if tag could not be parsed:
bsw/jbe@1309 88 if not tagname then
bsw/jbe@1309 89 return false, "Malformed tag"
bsw/jbe@1309 90 end
bsw/jbe@1309 91
bsw/jbe@1309 92 -- Make tagname lowercase:
bsw/jbe@1309 93 tagname = string.lower(tagname)
bsw/jbe@1309 94
bsw/jbe@1309 95 -- Append closing tag to list of expected closing tags:
bsw/jbe@1309 96 stack[#stack+1] = tagname
bsw/jbe@1309 97
bsw/jbe@1309 98 -- Allow <li> tag in proper context:
bsw/jbe@1309 99 if tagname == "li" and attrs == "" then
bsw/jbe@1309 100 if not list then
bsw/jbe@1309 101 return false, "List element outside list"
bsw/jbe@1309 102 end
bsw/jbe@1309 103 list = false
bsw/jbe@1309 104 listelm = true
bsw/jbe@1309 105 return loop(rest)
bsw/jbe@1309 106 end
bsw/jbe@1309 107
bsw/jbe@1309 108 -- If there was no valid <li> tag but <ol> or <ul> is open,
bsw/jbe@1309 109 -- then return false:
bsw/jbe@1309 110 if list then
bsw/jbe@1309 111 return false
bsw/jbe@1309 112 end
bsw/jbe@1309 113
bsw/jbe@1309 114 -- Allow <b>, <i>, <sup>, <sub> unless already open:
bsw/jbe@1309 115 if tagname == "b" and attrs == "" then
bsw/jbe@1309 116 if bold then
bsw/jbe@1309 117 return false, "Bold inside bold tag"
bsw/jbe@1309 118 end
bsw/jbe@1309 119 bold = true
bsw/jbe@1309 120 return loop(rest)
bsw/jbe@1309 121 end
bsw/jbe@1309 122 if tagname == "i" and attrs == "" then
bsw/jbe@1309 123 if italic then
bsw/jbe@1309 124 return false, "Italic inside italic tag"
bsw/jbe@1309 125 end
bsw/jbe@1309 126 italic = true
bsw/jbe@1309 127 return loop(rest)
bsw/jbe@1309 128 end
bsw/jbe@1309 129 if (tagname == "sup" or tagname == "sub") and attrs == "" then
bsw/jbe@1309 130 if supsub then
bsw/jbe@1309 131 return false, "Super/subscript inside super/subscript tag"
bsw/jbe@1309 132 end
bsw/jbe@1309 133 supsub = true
bsw/jbe@1309 134 return loop(rest)
bsw/jbe@1309 135 end
bsw/jbe@1309 136
bsw/jbe@1309 137 -- Allow <a href="..."> tag unless already open or malformed:
bsw/jbe@1309 138 if tagname == "a" then
bsw/jbe@1309 139 if link then
bsw/jbe@1309 140 return false, "Link inside link"
bsw/jbe@1309 141 end
bsw/jbe@1309 142 local url = string.match(attrs, '^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*"([^"]*)"$')
bsw/jbe@1309 143 if not url then
bsw/jbe@1309 144 url = string.match(attrs, "^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*'([^']*)'$")
bsw/jbe@1309 145 end
bsw/jbe@1309 146 if not url then
bsw/jbe@1309 147 url = string.match(attrs, "^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*([^\0-\32\"'=<>`]+)$")
bsw/jbe@1309 148 end
bsw/jbe@1309 149 if not url then
bsw/jbe@1309 150 return false, "Forbidden, missing, or malformed attributes in link tag"
bsw/jbe@1309 151 end
bsw/jbe@1309 152 if not string.find(url, "^[Hh][Tt][Tt][Pp][Ss]?://") then
bsw/jbe@1309 153 return false, "Invalid link URL"
bsw/jbe@1309 154 end
bsw/jbe@1309 155 link = true
bsw/jbe@1309 156 return loop(rest)
bsw/jbe@1309 157 end
bsw/jbe@1309 158
bsw/jbe@1309 159 -- Remaining tags require no open <p>, <b>, <i>, <sup>, <sub>,
bsw/jbe@1309 160 -- <a href="...">, or <h1>..</h6> tag:
bsw/jbe@1309 161 if para or bold or italic or supsub or link or heading then
bsw/jbe@1309 162 return false, "Forbidden child tag within paragraph, bold, italic, super/subscript, link, or heading tag"
bsw/jbe@1309 163 end
bsw/jbe@1309 164
bsw/jbe@1309 165 -- Allow <p>:
bsw/jbe@1309 166 if tagname == "p" and attrs == "" then
bsw/jbe@1309 167 para = true
bsw/jbe@1309 168 return loop(rest)
bsw/jbe@1309 169 end
bsw/jbe@1309 170
bsw/jbe@1309 171 -- Allow <h1>..<h6>:
bsw/jbe@1309 172 if string.find(tagname, "^h[1-6]$") and attrs == "" then
bsw/jbe@1309 173 heading = true
bsw/jbe@1309 174 return loop(rest)
bsw/jbe@1309 175 end
bsw/jbe@1309 176
bsw/jbe@1309 177 -- Allow <ul> and <ol>:
bsw/jbe@1309 178 if (tagname == "ul" or tagname == "ol") and attrs == "" then
bsw/jbe@1309 179 list = true
bsw/jbe@1309 180 return loop(rest)
bsw/jbe@1309 181 end
bsw/jbe@1309 182
bsw/jbe@1309 183 -- Disallow all others (including unexpected closing tags):
bsw/jbe@1309 184 return false, "Forbidden tag or forbidden attributes"
bsw/jbe@1309 185
bsw/jbe@1309 186 end
bsw/jbe@1309 187
bsw/jbe@1309 188 -- Invoke tail-call loop:
bsw/jbe@1309 189 return loop(str)
bsw/jbe@1309 190
bsw/jbe@1309 191 end

Impressum / About Us