liquid_feedback_frontend

diff env/util/html_is_safe.lua @ 1309:32cc544d5a5b

Cumulative patch for upcoming frontend version 4
author bsw/jbe
date Sun Jul 15 14:07:29 2018 +0200 (2018-07-15)
parents
children e6983d79d74f
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/env/util/html_is_safe.lua	Sun Jul 15 14:07:29 2018 +0200
     1.3 @@ -0,0 +1,191 @@
     1.4 +function util.html_is_safe(str)
     1.5 +
     1.6 +  -- All (ASCII) control characters except \t\n\f\r are forbidden:
     1.7 +  if string.find(str, "[\0-\8\11\14-\31\127]") then
     1.8 +    return false, "Invalid ASCII control character"
     1.9 +  end
    1.10 +
    1.11 +  -- Memorize expected closing tags:
    1.12 +  local stack = {}
    1.13 +
    1.14 +  -- State during parsing:
    1.15 +  local para    = false  -- <p> tag open
    1.16 +  local bold    = false  -- <b> tag open
    1.17 +  local italic  = false  -- <i> tag open
    1.18 +  local supsub  = false  -- <sup> or <sub> tag open
    1.19 +  local link    = false  -- <a href="..."> tag open
    1.20 +  local heading = false  -- <h1-6> tag open
    1.21 +  local list    = false  -- <ol> or <ul> (but no corresponding <li>) tag open
    1.22 +  local listelm = false  -- <li> tag (but no further <ol> or <ul> tag) open
    1.23 +
    1.24 +  -- Function looped with tail-calls:
    1.25 +  local function loop(str)
    1.26 +
    1.27 +    -- NOTE: We do not allow non-escaped "<" or ">" in attributes,
    1.28 +    --       even if HTML5 allows it.
    1.29 +
    1.30 +    -- Find any "<" or ">" character and determine context, i.e.
    1.31 +    -- pre = text before character, tag = text until closing ">", and rest:
    1.32 +    local pre, tag, rest = string.match(str, "^(.-)([<>][^<>]*>?)(.*)")
    1.33 +
    1.34 +    -- Disallow text content (except inter-element white-space) in <ol> or <ul>
    1.35 +    -- when outside <li>:
    1.36 +    if list and string.find(pre, "[^\t\n\f\r ]") then
    1.37 +      return false, "Text content in list but outside list element"
    1.38 +    end
    1.39 +
    1.40 +    -- If no more "<" or ">" characters are found,
    1.41 +    -- then return true if all tags have been closed:
    1.42 +    if not tag then
    1.43 +      if #stack == 0 then
    1.44 +        return true
    1.45 +      else
    1.46 +        return false, "Not all tags have been closed"
    1.47 +      end
    1.48 +    end
    1.49 +
    1.50 +    -- Handle (expected) closing tags:
    1.51 +    local closed_tagname = string.match(tag, "^</(.-)[\t\n\f\r ]*>$")
    1.52 +    if closed_tagname then
    1.53 +      closed_tagname = string.lower(closed_tagname)
    1.54 +      if closed_tagname ~= stack[#stack] then
    1.55 +        return false, "Wrong closing tag"
    1.56 +      end
    1.57 +      if closed_tagname == "p" then
    1.58 +        para = false
    1.59 +      elseif closed_tagname == "b" then
    1.60 +        bold = false
    1.61 +      elseif closed_tagname == "i" then
    1.62 +        italic = false
    1.63 +      elseif closed_tagname == "sup" or closed_tagname == "sub" then
    1.64 +        supsub = false
    1.65 +      elseif closed_tagname == "a" then
    1.66 +        link = false
    1.67 +      elseif string.find(closed_tagname, "^h[1-6]$") then
    1.68 +        heading = false
    1.69 +      elseif closed_tagname == "ul" or closed_tagname == "ol" then
    1.70 +        list = false
    1.71 +      elseif closed_tagname == "li" then
    1.72 +        listelm = false
    1.73 +        list = true
    1.74 +      end
    1.75 +      stack[#stack] = nil
    1.76 +      return loop(rest)
    1.77 +    end
    1.78 +
    1.79 +    -- Allow <br> tag as void tag:
    1.80 +    if string.find(tag, "^<[Bb][Rr][\t\n\f\r ]*/?>$") then
    1.81 +      return loop(rest)
    1.82 +    end
    1.83 +
    1.84 +    -- Parse opening tag:
    1.85 +    local tagname, attrs = string.match(
    1.86 +      tag,
    1.87 +      "^<([^<>\0-\32]+)[\t\n\f\r ]*([^<>]-)[\t\n\f\r ]*>$"
    1.88 +    )
    1.89 +
    1.90 +    -- Return false if tag could not be parsed:
    1.91 +    if not tagname then
    1.92 +      return false, "Malformed tag"
    1.93 +    end
    1.94 +
    1.95 +    -- Make tagname lowercase:
    1.96 +    tagname = string.lower(tagname)
    1.97 +
    1.98 +    -- Append closing tag to list of expected closing tags:
    1.99 +    stack[#stack+1] = tagname
   1.100 +
   1.101 +    -- Allow <li> tag in proper context:
   1.102 +    if tagname == "li" and attrs == "" then
   1.103 +      if not list then
   1.104 +        return false, "List element outside list"
   1.105 +      end
   1.106 +      list = false
   1.107 +      listelm = true
   1.108 +      return loop(rest)
   1.109 +    end
   1.110 +
   1.111 +    -- If there was no valid <li> tag but <ol> or <ul> is open,
   1.112 +    -- then return false:
   1.113 +    if list then
   1.114 +      return false
   1.115 +    end
   1.116 +
   1.117 +    -- Allow <b>, <i>, <sup>, <sub> unless already open:
   1.118 +    if tagname == "b" and attrs == "" then
   1.119 +      if bold then
   1.120 +        return false, "Bold inside bold tag"
   1.121 +      end
   1.122 +      bold = true
   1.123 +      return loop(rest)
   1.124 +    end
   1.125 +    if tagname == "i" and attrs == "" then
   1.126 +      if italic then
   1.127 +        return false, "Italic inside italic tag"
   1.128 +      end
   1.129 +      italic = true
   1.130 +      return loop(rest)
   1.131 +    end
   1.132 +    if (tagname == "sup" or tagname == "sub") and attrs == "" then
   1.133 +      if supsub then
   1.134 +        return false, "Super/subscript inside super/subscript tag"
   1.135 +      end
   1.136 +      supsub = true
   1.137 +      return loop(rest)
   1.138 +    end
   1.139 +
   1.140 +    -- Allow <a href="..."> tag unless already open or malformed:
   1.141 +    if tagname == "a" then
   1.142 +      if link then
   1.143 +        return false, "Link inside link"
   1.144 +      end
   1.145 +      local url = string.match(attrs, '^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*"([^"]*)"$')
   1.146 +      if not url then
   1.147 +        url = string.match(attrs, "^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*'([^']*)'$")
   1.148 +      end
   1.149 +      if not url then
   1.150 +        url = string.match(attrs, "^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*([^\0-\32\"'=<>`]+)$")
   1.151 +      end
   1.152 +      if not url then
   1.153 +       return false, "Forbidden, missing, or malformed attributes in link tag"
   1.154 +      end
   1.155 +      if not string.find(url, "^[Hh][Tt][Tt][Pp][Ss]?://") then
   1.156 +        return false, "Invalid link URL"
   1.157 +      end
   1.158 +      link = true
   1.159 +      return loop(rest)
   1.160 +    end
   1.161 +
   1.162 +    -- Remaining tags require no open <p>, <b>, <i>, <sup>, <sub>,
   1.163 +    -- <a href="...">, or <h1>..</h6> tag:
   1.164 +    if para or bold or italic or supsub or link or heading then
   1.165 +      return false, "Forbidden child tag within paragraph, bold, italic, super/subscript, link, or heading tag"
   1.166 +    end
   1.167 +
   1.168 +    -- Allow <p>:
   1.169 +    if tagname == "p" and attrs == "" then
   1.170 +      para = true
   1.171 +      return loop(rest)
   1.172 +    end
   1.173 +
   1.174 +    -- Allow <h1>..<h6>:
   1.175 +    if string.find(tagname, "^h[1-6]$") and attrs == "" then
   1.176 +      heading = true
   1.177 +      return loop(rest)
   1.178 +    end
   1.179 +
   1.180 +    -- Allow <ul> and <ol>:
   1.181 +    if (tagname == "ul" or tagname == "ol") and attrs == "" then
   1.182 +      list = true
   1.183 +      return loop(rest)
   1.184 +    end
   1.185 +
   1.186 +    -- Disallow all others (including unexpected closing tags):
   1.187 +    return false, "Forbidden tag or forbidden attributes"
   1.188 +
   1.189 +  end
   1.190 +
   1.191 +  -- Invoke tail-call loop:
   1.192 +  return loop(str)
   1.193 +
   1.194 +end

Impressum / About Us