| rev | line source | 
| bsw/jbe@1309 | 1 function util.html_is_safe(str) | 
| bsw/jbe@1309 | 2 | 
| bsw/jbe@1309 | 3   -- All (ASCII) control characters except \t\n\f\r are forbidden: | 
| bsw/jbe@1309 | 4   if string.find(str, "[\0-\8\11\14-\31\127]") then | 
| bsw/jbe@1309 | 5     return false, "Invalid ASCII control character" | 
| bsw/jbe@1309 | 6   end | 
| bsw/jbe@1309 | 7 | 
| bsw/jbe@1309 | 8   -- Memorize expected closing tags: | 
| bsw/jbe@1309 | 9   local stack = {} | 
| bsw/jbe@1309 | 10 | 
| bsw/jbe@1309 | 11   -- State during parsing: | 
| bsw/jbe@1309 | 12   local para    = false  -- <p> tag open | 
| bsw/jbe@1309 | 13   local bold    = false  -- <b> tag open | 
| bsw/jbe@1309 | 14   local italic  = false  -- <i> tag open | 
| bsw/jbe@1309 | 15   local supsub  = false  -- <sup> or <sub> tag open | 
| bsw/jbe@1309 | 16   local link    = false  -- <a href="..."> tag open | 
| bsw/jbe@1309 | 17   local heading = false  -- <h1-6> tag open | 
| bsw/jbe@1309 | 18   local list    = false  -- <ol> or <ul> (but no corresponding <li>) tag open | 
| bsw/jbe@1309 | 19   local listelm = false  -- <li> tag (but no further <ol> or <ul> tag) open | 
| jbe@1842 | 20   local pre     = false  -- <pre> tag open | 
| bsw/jbe@1309 | 21 | 
| bsw/jbe@1309 | 22   -- Function looped with tail-calls: | 
| bsw/jbe@1309 | 23   local function loop(str) | 
| bsw/jbe@1309 | 24 | 
| bsw/jbe@1309 | 25     -- NOTE: We do not allow non-escaped "<" or ">" in attributes, | 
| bsw/jbe@1309 | 26     --       even if HTML5 allows it. | 
| bsw/jbe@1309 | 27 | 
| bsw/jbe@1309 | 28     -- Find any "<" or ">" character and determine context, i.e. | 
| jbe@1842 | 29     -- prefix = text before character, tag = text until closing ">", and rest: | 
| jbe@1842 | 30     local prefix, tag, rest = string.match(str, "^(.-)([<>][^<>]*>?)(.*)") | 
| bsw/jbe@1309 | 31 | 
| bsw/jbe@1309 | 32     -- If no more "<" or ">" characters are found, | 
| bsw/jbe@1309 | 33     -- then return true if all tags have been closed: | 
| bsw/jbe@1309 | 34     if not tag then | 
| bsw/jbe@1309 | 35       if #stack == 0 then | 
| bsw/jbe@1309 | 36         return true | 
| bsw/jbe@1309 | 37       else | 
| bsw/jbe@1309 | 38         return false, "Not all tags have been closed" | 
| bsw/jbe@1309 | 39       end | 
| bsw/jbe@1309 | 40     end | 
| bsw/jbe@1309 | 41 | 
| jbe@1841 | 42     -- Disallow text content (except inter-element white-space) in <ol> or <ul> | 
| jbe@1841 | 43     -- when outside <li>: | 
| jbe@1842 | 44     if list and string.find(prefix, "[^\t\n\f\r ]") then | 
| jbe@1841 | 45       return false, "Text content in list but outside list element" | 
| jbe@1841 | 46     end | 
| jbe@1841 | 47 | 
| bsw/jbe@1309 | 48     -- Handle (expected) closing tags: | 
| bsw/jbe@1309 | 49     local closed_tagname = string.match(tag, "^</(.-)[\t\n\f\r ]*>$") | 
| bsw/jbe@1309 | 50     if closed_tagname then | 
| bsw/jbe@1309 | 51       closed_tagname = string.lower(closed_tagname) | 
| bsw/jbe@1309 | 52       if closed_tagname ~= stack[#stack] then | 
| bsw/jbe@1309 | 53         return false, "Wrong closing tag" | 
| bsw/jbe@1309 | 54       end | 
| bsw/jbe@1309 | 55       if closed_tagname == "p" then | 
| bsw/jbe@1309 | 56         para = false | 
| bsw/jbe@1309 | 57       elseif closed_tagname == "b" then | 
| bsw/jbe@1309 | 58         bold = false | 
| bsw/jbe@1309 | 59       elseif closed_tagname == "i" then | 
| bsw/jbe@1309 | 60         italic = false | 
| bsw/jbe@1309 | 61       elseif closed_tagname == "sup" or closed_tagname == "sub" then | 
| bsw/jbe@1309 | 62         supsub = false | 
| bsw/jbe@1309 | 63       elseif closed_tagname == "a" then | 
| bsw/jbe@1309 | 64         link = false | 
| bsw/jbe@1309 | 65       elseif string.find(closed_tagname, "^h[1-6]$") then | 
| bsw/jbe@1309 | 66         heading = false | 
| bsw/jbe@1309 | 67       elseif closed_tagname == "ul" or closed_tagname == "ol" then | 
| bsw/jbe@1309 | 68         list = false | 
| bsw/jbe@1309 | 69       elseif closed_tagname == "li" then | 
| bsw/jbe@1309 | 70         listelm = false | 
| bsw/jbe@1309 | 71         list = true | 
| jbe@1842 | 72       elseif closed_tagname == "pre" then | 
| jbe@1842 | 73         pre = false | 
| bsw/jbe@1309 | 74       end | 
| bsw/jbe@1309 | 75       stack[#stack] = nil | 
| bsw/jbe@1309 | 76       return loop(rest) | 
| bsw/jbe@1309 | 77     end | 
| bsw/jbe@1309 | 78 | 
| bsw/jbe@1309 | 79     -- Allow <br> tag as void tag: | 
| bsw/jbe@1309 | 80     if string.find(tag, "^<[Bb][Rr][\t\n\f\r ]*/?>$") then | 
| bsw/jbe@1309 | 81       return loop(rest) | 
| bsw/jbe@1309 | 82     end | 
| bsw/jbe@1309 | 83 | 
| bsw/jbe@1309 | 84     -- Parse opening tag: | 
| bsw/jbe@1309 | 85     local tagname, attrs = string.match( | 
| bsw/jbe@1309 | 86       tag, | 
| bsw/jbe@1309 | 87       "^<([^<>\0-\32]+)[\t\n\f\r ]*([^<>]-)[\t\n\f\r ]*>$" | 
| bsw/jbe@1309 | 88     ) | 
| bsw/jbe@1309 | 89 | 
| bsw/jbe@1309 | 90     -- Return false if tag could not be parsed: | 
| bsw/jbe@1309 | 91     if not tagname then | 
| bsw/jbe@1309 | 92       return false, "Malformed tag" | 
| bsw/jbe@1309 | 93     end | 
| bsw/jbe@1309 | 94 | 
| bsw/jbe@1309 | 95     -- Make tagname lowercase: | 
| bsw/jbe@1309 | 96     tagname = string.lower(tagname) | 
| bsw/jbe@1309 | 97 | 
| bsw/jbe@1309 | 98     -- Append closing tag to list of expected closing tags: | 
| bsw/jbe@1309 | 99     stack[#stack+1] = tagname | 
| bsw/jbe@1309 | 100 | 
| bsw/jbe@1309 | 101     -- Allow <li> tag in proper context: | 
| bsw/jbe@1309 | 102     if tagname == "li" and attrs == "" then | 
| bsw/jbe@1309 | 103       if not list then | 
| bsw/jbe@1309 | 104         return false, "List element outside list" | 
| bsw/jbe@1309 | 105       end | 
| bsw/jbe@1309 | 106       list = false | 
| bsw/jbe@1309 | 107       listelm = true | 
| bsw/jbe@1309 | 108       return loop(rest) | 
| bsw/jbe@1309 | 109     end | 
| bsw/jbe@1309 | 110 | 
| bsw/jbe@1309 | 111     -- If there was no valid <li> tag but <ol> or <ul> is open, | 
| bsw/jbe@1309 | 112     -- then return false: | 
| bsw/jbe@1309 | 113     if list then | 
| bsw/jbe@1309 | 114       return false | 
| bsw/jbe@1309 | 115     end | 
| bsw/jbe@1309 | 116 | 
| bsw/jbe@1309 | 117     -- Allow <b>, <i>, <sup>, <sub> unless already open: | 
| bsw/jbe@1309 | 118     if tagname == "b" and attrs == "" then | 
| bsw/jbe@1309 | 119       if bold then | 
| bsw/jbe@1309 | 120         return false, "Bold inside bold tag" | 
| bsw/jbe@1309 | 121       end | 
| bsw/jbe@1309 | 122       bold = true | 
| bsw/jbe@1309 | 123       return loop(rest) | 
| bsw/jbe@1309 | 124     end | 
| bsw/jbe@1309 | 125     if tagname == "i" and attrs == "" then | 
| bsw/jbe@1309 | 126       if italic then | 
| bsw/jbe@1309 | 127         return false, "Italic inside italic tag" | 
| bsw/jbe@1309 | 128       end | 
| bsw/jbe@1309 | 129       italic = true | 
| bsw/jbe@1309 | 130       return loop(rest) | 
| bsw/jbe@1309 | 131     end | 
| bsw/jbe@1309 | 132     if (tagname == "sup" or tagname == "sub") and attrs == "" then | 
| bsw/jbe@1309 | 133       if supsub then | 
| bsw/jbe@1309 | 134         return false, "Super/subscript inside super/subscript tag" | 
| bsw/jbe@1309 | 135       end | 
| bsw/jbe@1309 | 136       supsub = true | 
| bsw/jbe@1309 | 137       return loop(rest) | 
| bsw/jbe@1309 | 138     end | 
| bsw/jbe@1309 | 139 | 
| bsw/jbe@1309 | 140     -- Allow <a href="..."> tag unless already open or malformed: | 
| bsw/jbe@1309 | 141     if tagname == "a" then | 
| bsw/jbe@1309 | 142       if link then | 
| bsw/jbe@1309 | 143         return false, "Link inside link" | 
| bsw/jbe@1309 | 144       end | 
| bsw/jbe@1309 | 145       local url = string.match(attrs, '^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*"([^"]*)"$') | 
| bsw/jbe@1309 | 146       if not url then | 
| bsw/jbe@1309 | 147         url = string.match(attrs, "^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*'([^']*)'$") | 
| bsw/jbe@1309 | 148       end | 
| bsw/jbe@1309 | 149       if not url then | 
| bsw/jbe@1309 | 150         url = string.match(attrs, "^[Hh][Rr][Ee][Ff][\t\n\f\r ]*=[\t\n\f\r ]*([^\0-\32\"'=<>`]+)$") | 
| bsw/jbe@1309 | 151       end | 
| bsw/jbe@1309 | 152       if not url then | 
| bsw/jbe@1309 | 153        return false, "Forbidden, missing, or malformed attributes in link tag" | 
| bsw/jbe@1309 | 154       end | 
| bsw/jbe@1309 | 155       if not string.find(url, "^[Hh][Tt][Tt][Pp][Ss]?://") then | 
| bsw/jbe@1309 | 156         return false, "Invalid link URL" | 
| bsw/jbe@1309 | 157       end | 
| bsw/jbe@1309 | 158       link = true | 
| bsw/jbe@1309 | 159       return loop(rest) | 
| bsw/jbe@1309 | 160     end | 
| bsw/jbe@1309 | 161 | 
| jbe@1842 | 162     -- Always allow <pre> | 
| jbe@1842 | 163     if tagname == "pre" then | 
| jbe@1842 | 164         pre = true | 
| jbe@1842 | 165         return loop(rest) | 
| jbe@1842 | 166     end | 
| jbe@1842 | 167 | 
| bsw/jbe@1309 | 168     -- Remaining tags require no open <p>, <b>, <i>, <sup>, <sub>, | 
| bsw/jbe@1309 | 169     -- <a href="...">, or <h1>..</h6> tag: | 
| jbe@1842 | 170     -- TODO: HTML also requires that no <pre> tag is open, but check not done | 
| jbe@1842 | 171     -- here due to used WYSIWYG editor | 
| bsw/jbe@1309 | 172     if para or bold or italic or supsub or link or heading then | 
| bsw/jbe@1309 | 173       return false, "Forbidden child tag within paragraph, bold, italic, super/subscript, link, or heading tag" | 
| bsw/jbe@1309 | 174     end | 
| bsw/jbe@1309 | 175 | 
| bsw/jbe@1309 | 176     -- Allow <p>: | 
| bsw/jbe@1309 | 177     if tagname == "p" and attrs == "" then | 
| bsw/jbe@1309 | 178       para = true | 
| bsw/jbe@1309 | 179       return loop(rest) | 
| bsw/jbe@1309 | 180     end | 
| bsw/jbe@1309 | 181 | 
| bsw/jbe@1309 | 182     -- Allow <h1>..<h6>: | 
| bsw/jbe@1309 | 183     if string.find(tagname, "^h[1-6]$") and attrs == "" then | 
| bsw/jbe@1309 | 184       heading = true | 
| bsw/jbe@1309 | 185       return loop(rest) | 
| bsw/jbe@1309 | 186     end | 
| bsw/jbe@1309 | 187 | 
| bsw/jbe@1309 | 188     -- Allow <ul> and <ol>: | 
| bsw/jbe@1309 | 189     if (tagname == "ul" or tagname == "ol") and attrs == "" then | 
| bsw/jbe@1309 | 190       list = true | 
| bsw/jbe@1309 | 191       return loop(rest) | 
| bsw/jbe@1309 | 192     end | 
| bsw/jbe@1309 | 193 | 
| bsw/jbe@1309 | 194     -- Disallow all others (including unexpected closing tags): | 
| bsw/jbe@1309 | 195     return false, "Forbidden tag or forbidden attributes" | 
| bsw/jbe@1309 | 196 | 
| bsw/jbe@1309 | 197   end | 
| bsw/jbe@1309 | 198 | 
| bsw/jbe@1309 | 199   -- Invoke tail-call loop: | 
| bsw/jbe@1309 | 200   return loop(str) | 
| bsw/jbe@1309 | 201 | 
| bsw/jbe@1309 | 202 end |