| rev | line source | 
| bsw/jbe@1309 | 1 function util.html_to_text(str) | 
| bsw/jbe@1309 | 2   str = string.gsub(str, "[\0-\32]", " ") | 
| bsw/jbe@1309 | 3   str = string.gsub(str, "<[Bb][Rr] */?>", "\n") | 
| bsw/jbe@1309 | 4   str = string.gsub(str, "</?[Pp] *>", "\n\n") | 
| bsw/jbe@1309 | 5   str = string.gsub(str, "</?[Bb] *>", "**") | 
| bsw/jbe@1309 | 6   str = string.gsub(str, "</?[Ii] *>", "//") | 
| bsw/jbe@1309 | 7   str = string.gsub(str, "</?[Ss][Uu][Bb] *>", "__") | 
| bsw/jbe@1309 | 8   str = string.gsub(str, "</?[Ss][Uu][Pp] *>", "^^") | 
| bsw/jbe@1309 | 9   str = string.gsub(str, '<[Aa] *[Hh][Rr][Ee][Ff] *= *"([^"]*)" *>', "[[%1 ") | 
| bsw/jbe@1309 | 10   str = string.gsub(str, "<[Aa] *[Hh][Rr][Ee][Ff] *= *'([^']*)' *>", "[[%1 ") | 
| bsw/jbe@1309 | 11   str = string.gsub(str, "<[Aa] *[Hh][Rr][Ee][Ff] *= *([^ <>\"']*) *>", "[[%1 ") | 
| bsw/jbe@1309 | 12   str = string.gsub(str, "</[Aa] *>", "]]") | 
| bsw/jbe@1309 | 13   str = string.gsub(str, "<[Hh]1 *>", "\n\n###### ") | 
| bsw/jbe@1309 | 14   str = string.gsub(str, "<[Hh]2 *>", "\n\n##### ") | 
| bsw/jbe@1309 | 15   str = string.gsub(str, "<[Hh]3 *>", "\n\n#### ") | 
| bsw/jbe@1309 | 16   str = string.gsub(str, "<[Hh]4 *>", "\n\n### ") | 
| bsw/jbe@1309 | 17   str = string.gsub(str, "<[Hh]5 *>", "\n\n## ") | 
| bsw/jbe@1309 | 18   str = string.gsub(str, "<[Hh]6 *>", "\n\n# ") | 
| bsw/jbe@1309 | 19   str = string.gsub(str, "</[Hh]1 *>", " ######\n\n") | 
| bsw/jbe@1309 | 20   str = string.gsub(str, "</[Hh]2 *>", " #####\n\n") | 
| bsw/jbe@1309 | 21   str = string.gsub(str, "</[Hh]3 *>", " ####\n\n") | 
| bsw/jbe@1309 | 22   str = string.gsub(str, "</[Hh]4 *>", " ###\n\n") | 
| bsw/jbe@1309 | 23   str = string.gsub(str, "</[Hh]5 *>", " ##\n\n") | 
| bsw/jbe@1309 | 24   str = string.gsub(str, "</[Hh]6 *>", " #\n\n") | 
| bsw/jbe@1309 | 25   local li_info = {} | 
| bsw/jbe@1309 | 26   local pos = 1 | 
| bsw/jbe@1309 | 27   local counters = {} | 
| bsw/jbe@1309 | 28   while true do | 
| bsw/jbe@1309 | 29     local list_start, list_stop, list_tagname = string.find(str, "<(/?[OoUu]l) *>", pos) | 
| bsw/jbe@1309 | 30     if list_tagname then | 
| bsw/jbe@1309 | 31       list_tagname = string.lower(list_tagname) | 
| bsw/jbe@1309 | 32     end | 
| bsw/jbe@1309 | 33     local elem_start, elem_stop = string.find(str, "<[Ll][Ii] *>", pos) | 
| bsw/jbe@1309 | 34     if list_start and not elem_start then | 
| bsw/jbe@1309 | 35       pos = list_stop | 
| bsw/jbe@1309 | 36     elseif elem_start and not list_start then | 
| bsw/jbe@1309 | 37       pos = elem_stop | 
| bsw/jbe@1309 | 38     elseif list_start and elem_start then | 
| bsw/jbe@1309 | 39       if list_start < elem_start then | 
| bsw/jbe@1309 | 40         pos = list_stop | 
| bsw/jbe@1309 | 41       else | 
| bsw/jbe@1309 | 42         pos = elem_stop | 
| bsw/jbe@1309 | 43         list_tagname = nil | 
| bsw/jbe@1309 | 44       end | 
| bsw/jbe@1309 | 45     else | 
| bsw/jbe@1309 | 46       break | 
| bsw/jbe@1309 | 47     end | 
| bsw/jbe@1309 | 48     if list_tagname == "ol" then | 
| bsw/jbe@1309 | 49       counters[#counters+1] = 0 | 
| bsw/jbe@1309 | 50     elseif list_tagname == "ul" then | 
| bsw/jbe@1309 | 51       counters[#counters+1] = false | 
| bsw/jbe@1309 | 52     elseif list_tagname then | 
| bsw/jbe@1309 | 53       counters[#counters] = nil | 
| bsw/jbe@1309 | 54     else | 
| bsw/jbe@1309 | 55       if counters[#counters] then | 
| bsw/jbe@1309 | 56         counters[#counters] = counters[#counters] + 1 | 
| bsw/jbe@1309 | 57       end | 
| bsw/jbe@1309 | 58       local string_parts = {} | 
| bsw/jbe@1309 | 59       for idx, counter in ipairs(counters) do | 
| bsw/jbe@1309 | 60         if counter then | 
| bsw/jbe@1309 | 61           string_parts[idx] = tostring(counter) .. ". " | 
| bsw/jbe@1309 | 62         else | 
| bsw/jbe@1309 | 63           string_parts[idx] = "* " | 
| bsw/jbe@1309 | 64         end | 
| bsw/jbe@1309 | 65       end | 
| bsw/jbe@1309 | 66       li_info[#li_info+1] = table.concat(string_parts) | 
| bsw/jbe@1309 | 67     end | 
| bsw/jbe@1309 | 68   end | 
| bsw/jbe@1309 | 69   str = string.gsub(str, "</?[OoUu]l *>", "\n\n") | 
| bsw/jbe@1309 | 70   local li_index = 0 | 
| bsw/jbe@1309 | 71   str = string.gsub(str, "<[Ll][Ii] *>", function() | 
| bsw/jbe@1309 | 72     li_index = li_index + 1 | 
| bsw/jbe@1309 | 73     return li_info[li_index] | 
| bsw/jbe@1309 | 74   end) | 
| bsw/jbe@1309 | 75   str = string.gsub(str, "</[Ll][Ii] *>", "\n") | 
| bsw/jbe@1309 | 76   str = string.gsub(str, "<[^<>]*>", "") | 
| bsw/jbe@1309 | 77   str = string.gsub(str, "<", "<") | 
| bsw/jbe@1309 | 78   str = string.gsub(str, ">", ">") | 
| bsw/jbe@1309 | 79   str = string.gsub(str, "  +", " ") | 
| bsw/jbe@1309 | 80   str = string.gsub(str, "%f[^\0\n] ", "") | 
| bsw/jbe@1309 | 81   str = string.gsub(str, " %f[\0\n]", "") | 
| bsw/jbe@1309 | 82   str = string.gsub(str, "\n\n\n+", "\n\n") | 
| bsw/jbe@1309 | 83   str = string.gsub(str, "^\n+", "") | 
| bsw/jbe@1309 | 84   str = string.gsub(str, "\n*$", "\n") | 
| bsw/jbe@1309 | 85   return str | 
| bsw/jbe@1309 | 86 end |