liquid_feedback_frontend
view env/util/html_is_safe.lua @ 1662:59cbb870253c
Show draft edit again on error
| author | bsw | 
|---|---|
| date | Mon Feb 15 14:17:51 2021 +0100 (2021-02-15) | 
| parents | 32cc544d5a5b | 
| children | e6983d79d74f | 
 line source
     1 function util.html_is_safe(str)
     3   -- All (ASCII) control characters except \t\n\f\r are forbidden:
     4   if string.find(str, "[\0-\8\11\14-\31\127]") then
     5     return false, "Invalid ASCII control character"
     6   end
     8   -- Memorize expected closing tags:
     9   local stack = {}
    11   -- State during parsing:
    12   local para    = false  -- <p> tag open
    13   local bold    = false  -- <b> tag open
    14   local italic  = false  -- <i> tag open
    15   local supsub  = false  -- <sup> or <sub> tag open
    16   local link    = false  -- <a href="..."> tag open
    17   local heading = false  -- <h1-6> tag open
    18   local list    = false  -- <ol> or <ul> (but no corresponding <li>) tag open
    19   local listelm = false  -- <li> tag (but no further <ol> or <ul> tag) open
    21   -- Function looped with tail-calls:
    22   local function loop(str)
    24     -- NOTE: We do not allow non-escaped "<" or ">" in attributes,
    25     --       even if HTML5 allows it.
    27     -- Find any "<" or ">" character and determine context, i.e.
    28     -- pre = text before character, tag = text until closing ">", and rest:
    29     local pre, tag, rest = string.match(str, "^(.-)([<>][^<>]*>?)(.*)")
    31     -- Disallow text content (except inter-element white-space) in <ol> or <ul>
    32     -- when outside <li>:
    33     if list and string.find(pre, "[^\t\n\f\r ]") then
    34       return false, "Text content in list but outside list element"
    35     end
    37     -- If no more "<" or ">" characters are found,
    38     -- then return true if all tags have been closed:
    39     if not tag then
    40       if #stack == 0 then
    41         return true
    42       else
    43         return false, "Not all tags have been closed"
    44       end
    45     end
    47     -- Handle (expected) closing tags:
    48     local closed_tagname = string.match(tag, "^</(.-)[\t\n\f\r ]*>$")
    49     if closed_tagname then
    50       closed_tagname = string.lower(closed_tagname)
    51       if closed_tagname ~= stack[#stack] then
    52         return false, "Wrong closing tag"
    53       end
    54       if closed_tagname == "p" then
    55         para = false
    56       elseif closed_tagname == "b" then
    57         bold = false
    58       elseif closed_tagname == "i" then
    59         italic = false
    60       elseif closed_tagname == "sup" or closed_tagname == "sub" then
    61         supsub = false
    62       elseif closed_tagname == "a" then
    63         link = false
    64       elseif string.find(closed_tagname, "^h[1-6]$") then
    65         heading = false
    66       elseif closed_tagname == "ul" or closed_tagname == "ol" then
    67         list = false
    68       elseif closed_tagname == "li" then
    69         listelm = false
    70         list = true
    71       end
    72       stack[#stack] = nil
    73       return loop(rest)
    74     end
    76     -- Allow <br> tag as void tag:
    77     if string.find(tag, "^<[Bb][Rr][\t\n\f\r ]*/?>$") then
    78       return loop(rest)
    79     end
    81     -- Parse opening tag:
    82     local tagname, attrs = string.match(
    83       tag,
    84       "^<([^<>\0-\32]+)[\t\n\f\r ]*([^<>]-)[\t\n\f\r ]*>$"
    85     )
    87     -- Return false if tag could not be parsed:
    88     if not tagname then
    89       return false, "Malformed tag"
    90     end
    92     -- Make tagname lowercase:
    93     tagname = string.lower(tagname)
    95     -- Append closing tag to list of expected closing tags:
    96     stack[#stack+1] = tagname
    98     -- Allow <li> tag in proper context:
    99     if tagname == "li" and attrs == "" then
   100       if not list then
   101         return false, "List element outside list"
   102       end
   103       list = false
   104       listelm = true
   105       return loop(rest)
   106     end
   108     -- If there was no valid <li> tag but <ol> or <ul> is open,
   109     -- then return false:
   110     if list then
   111       return false
   112     end
   114     -- Allow <b>, <i>, <sup>, <sub> unless already open:
   115     if tagname == "b" and attrs == "" then
   116       if bold then
   117         return false, "Bold inside bold tag"
   118       end
   119       bold = true
   120       return loop(rest)
   121     end
   122     if tagname == "i" and attrs == "" then
   123       if italic then
   124         return false, "Italic inside italic tag"
   125       end
   126       italic = true
   127       return loop(rest)
   128     end
   129     if (tagname == "sup" or tagname == "sub") and attrs == "" then
   130       if supsub then
   131         return false, "Super/subscript inside super/subscript tag"
   132       end
   133       supsub = true
   134       return loop(rest)
   135     end
   137     -- Allow <a href="..."> tag unless already open or malformed:
   138     if tagname == "a" then
   139       if link then
   140         return false, "Link inside link"
   141       end
   142       local url = string.match(attrs, '^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*"([^"]*)"$')
   143       if not url then
   144         url = string.match(attrs, "^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*'([^']*)'$")
   145       end
   146       if not url then
   147         url = string.match(attrs, "^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*([^\0-\32\"'=<>`]+)$")
   148       end
   149       if not url then
   150        return false, "Forbidden, missing, or malformed attributes in link tag"
   151       end
   152       if not string.find(url, "^[Hh][Tt][Tt][Pp][Ss]?://") then
   153         return false, "Invalid link URL"
   154       end
   155       link = true
   156       return loop(rest)
   157     end
   159     -- Remaining tags require no open <p>, <b>, <i>, <sup>, <sub>,
   160     -- <a href="...">, or <h1>..</h6> tag:
   161     if para or bold or italic or supsub or link or heading then
   162       return false, "Forbidden child tag within paragraph, bold, italic, super/subscript, link, or heading tag"
   163     end
   165     -- Allow <p>:
   166     if tagname == "p" and attrs == "" then
   167       para = true
   168       return loop(rest)
   169     end
   171     -- Allow <h1>..<h6>:
   172     if string.find(tagname, "^h[1-6]$") and attrs == "" then
   173       heading = true
   174       return loop(rest)
   175     end
   177     -- Allow <ul> and <ol>:
   178     if (tagname == "ul" or tagname == "ol") and attrs == "" then
   179       list = true
   180       return loop(rest)
   181     end
   183     -- Disallow all others (including unexpected closing tags):
   184     return false, "Forbidden tag or forbidden attributes"
   186   end
   188   -- Invoke tail-call loop:
   189   return loop(str)
   191 end
