function util.html_is_safe(str) -- All (ASCII) control characters except \t\n\f\r are forbidden: if string.find(str, "[\0-\8\11\14-\31\127]") then return false, "Invalid ASCII control character" end -- Memorize expected closing tags: local stack = {} -- State during parsing: local para = false --
tag open
local bold = false -- tag open
local italic = false -- tag open
local supsub = false -- or tag open
local link = false -- tag open
local heading = false -- or
(but no corresponding
or
tag) open
local pre = false --
tag open
-- Function looped with tail-calls:
local function loop(str)
-- NOTE: We do not allow non-escaped "<" or ">" in attributes,
-- even if HTML5 allows it.
-- Find any "<" or ">" character and determine context, i.e.
-- prefix = text before character, tag = text until closing ">", and rest:
local prefix, tag, rest = string.match(str, "^(.-)([<>][^<>]*>?)(.*)")
-- If no more "<" or ">" characters are found,
-- then return true if all tags have been closed:
if not tag then
if #stack == 0 then
return true
else
return false, "Not all tags have been closed"
end
end
-- Disallow text content (except inter-element white-space) in or
-- when outside
tag as void tag:
if string.find(tag, "^<[Bb][Rr][\t\n\f\r ]*/?>$") then
return loop(rest)
end
-- Parse opening tag:
local tagname, attrs = string.match(
tag,
"^<([^<>\0-\32]+)[\t\n\f\r ]*([^<>]-)[\t\n\f\r ]*>$"
)
-- Return false if tag could not be parsed:
if not tagname then
return false, "Malformed tag"
end
-- Make tagname lowercase:
tagname = string.lower(tagname)
-- Append closing tag to list of expected closing tags:
stack[#stack+1] = tagname
-- Allow or
is open,
-- then return false:
if list then
return false
end
-- Allow , , , unless already open:
if tagname == "b" and attrs == "" then
if bold then
return false, "Bold inside bold tag"
end
bold = true
return loop(rest)
end
if tagname == "i" and attrs == "" then
if italic then
return false, "Italic inside italic tag"
end
italic = true
return loop(rest)
end
if (tagname == "sup" or tagname == "sub") and attrs == "" then
if supsub then
return false, "Super/subscript inside super/subscript tag"
end
supsub = true
return loop(rest)
end
-- Allow tag unless already open or malformed:
if tagname == "a" then
if link then
return false, "Link inside link"
end
local url = string.match(attrs, '^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*"([^"]*)"$')
if not url then
url = string.match(attrs, "^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*'([^']*)'$")
end
if not url then
url = string.match(attrs, "^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*([^\0-\32\"'=<>`]+)$")
end
if not url then
return false, "Forbidden, missing, or malformed attributes in link tag"
end
if not string.find(url, "^[Hh][Tt][Tt][Pp][Ss]?://") then
return false, "Invalid link URL"
end
link = true
return loop(rest)
end
-- Always allow
if tagname == "pre" then
pre = true
return loop(rest)
end
-- Remaining tags require no open