bsw/jbe@1309: function util.html_is_safe(str) bsw/jbe@1309: bsw/jbe@1309: -- All (ASCII) control characters except \t\n\f\r are forbidden: bsw/jbe@1309: if string.find(str, "[\0-\8\11\14-\31\127]") then bsw/jbe@1309: return false, "Invalid ASCII control character" bsw/jbe@1309: end bsw/jbe@1309: bsw/jbe@1309: -- Memorize expected closing tags: bsw/jbe@1309: local stack = {} bsw/jbe@1309: bsw/jbe@1309: -- State during parsing: bsw/jbe@1309: local para = false --
tag open
bsw/jbe@1309: local bold = false -- tag open
bsw/jbe@1309: local italic = false -- tag open
bsw/jbe@1309: local supsub = false -- or tag open
bsw/jbe@1309: local link = false -- tag open
bsw/jbe@1309: local heading = false -- or
(but no corresponding
or
tag) open
bsw/jbe@1309:
bsw/jbe@1309: -- Function looped with tail-calls:
bsw/jbe@1309: local function loop(str)
bsw/jbe@1309:
bsw/jbe@1309: -- NOTE: We do not allow non-escaped "<" or ">" in attributes,
bsw/jbe@1309: -- even if HTML5 allows it.
bsw/jbe@1309:
bsw/jbe@1309: -- Find any "<" or ">" character and determine context, i.e.
bsw/jbe@1309: -- pre = text before character, tag = text until closing ">", and rest:
bsw/jbe@1309: local pre, tag, rest = string.match(str, "^(.-)([<>][^<>]*>?)(.*)")
bsw/jbe@1309:
bsw/jbe@1309: -- Disallow text content (except inter-element white-space) in
or
bsw/jbe@1309: -- when outside
tag as void tag:
bsw/jbe@1309: if string.find(tag, "^<[Bb][Rr][\t\n\f\r ]*/?>$") then
bsw/jbe@1309: return loop(rest)
bsw/jbe@1309: end
bsw/jbe@1309:
bsw/jbe@1309: -- Parse opening tag:
bsw/jbe@1309: local tagname, attrs = string.match(
bsw/jbe@1309: tag,
bsw/jbe@1309: "^<([^<>\0-\32]+)[\t\n\f\r ]*([^<>]-)[\t\n\f\r ]*>$"
bsw/jbe@1309: )
bsw/jbe@1309:
bsw/jbe@1309: -- Return false if tag could not be parsed:
bsw/jbe@1309: if not tagname then
bsw/jbe@1309: return false, "Malformed tag"
bsw/jbe@1309: end
bsw/jbe@1309:
bsw/jbe@1309: -- Make tagname lowercase:
bsw/jbe@1309: tagname = string.lower(tagname)
bsw/jbe@1309:
bsw/jbe@1309: -- Append closing tag to list of expected closing tags:
bsw/jbe@1309: stack[#stack+1] = tagname
bsw/jbe@1309:
bsw/jbe@1309: -- Allow or
is open,
bsw/jbe@1309: -- then return false:
bsw/jbe@1309: if list then
bsw/jbe@1309: return false
bsw/jbe@1309: end
bsw/jbe@1309:
bsw/jbe@1309: -- Allow , , , unless already open:
bsw/jbe@1309: if tagname == "b" and attrs == "" then
bsw/jbe@1309: if bold then
bsw/jbe@1309: return false, "Bold inside bold tag"
bsw/jbe@1309: end
bsw/jbe@1309: bold = true
bsw/jbe@1309: return loop(rest)
bsw/jbe@1309: end
bsw/jbe@1309: if tagname == "i" and attrs == "" then
bsw/jbe@1309: if italic then
bsw/jbe@1309: return false, "Italic inside italic tag"
bsw/jbe@1309: end
bsw/jbe@1309: italic = true
bsw/jbe@1309: return loop(rest)
bsw/jbe@1309: end
bsw/jbe@1309: if (tagname == "sup" or tagname == "sub") and attrs == "" then
bsw/jbe@1309: if supsub then
bsw/jbe@1309: return false, "Super/subscript inside super/subscript tag"
bsw/jbe@1309: end
bsw/jbe@1309: supsub = true
bsw/jbe@1309: return loop(rest)
bsw/jbe@1309: end
bsw/jbe@1309:
bsw/jbe@1309: -- Allow tag unless already open or malformed:
bsw/jbe@1309: if tagname == "a" then
bsw/jbe@1309: if link then
bsw/jbe@1309: return false, "Link inside link"
bsw/jbe@1309: end
bsw/jbe@1309: local url = string.match(attrs, '^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*"([^"]*)"$')
bsw/jbe@1309: if not url then
bsw/jbe@1309: url = string.match(attrs, "^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*'([^']*)'$")
bsw/jbe@1309: end
bsw/jbe@1309: if not url then
bsw/jbe@1309: url = string.match(attrs, "^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*([^\0-\32\"'=<>`]+)$")
bsw/jbe@1309: end
bsw/jbe@1309: if not url then
bsw/jbe@1309: return false, "Forbidden, missing, or malformed attributes in link tag"
bsw/jbe@1309: end
bsw/jbe@1309: if not string.find(url, "^[Hh][Tt][Tt][Pp][Ss]?://") then
bsw/jbe@1309: return false, "Invalid link URL"
bsw/jbe@1309: end
bsw/jbe@1309: link = true
bsw/jbe@1309: return loop(rest)
bsw/jbe@1309: end
bsw/jbe@1309:
bsw/jbe@1309: -- Remaining tags require no open