bsw/jbe@1309: function util.html_to_text(str)
bsw/jbe@1309:   str = string.gsub(str, "[\0-\32]", " ")
bsw/jbe@1309:   str = string.gsub(str, "<[Bb][Rr] */?>", "\n")
bsw/jbe@1309:   str = string.gsub(str, "?[Pp] *>", "\n\n")
bsw/jbe@1309:   str = string.gsub(str, "?[Bb] *>", "**")
bsw/jbe@1309:   str = string.gsub(str, "?[Ii] *>", "//")
bsw/jbe@1309:   str = string.gsub(str, "?[Ss][Uu][Bb] *>", "__")
bsw/jbe@1309:   str = string.gsub(str, "?[Ss][Uu][Pp] *>", "^^")
bsw/jbe@1309:   str = string.gsub(str, '<[Aa] *[Hh][Rr][Ee][Ff] *= *"([^"]*)" *>', "[[%1 ")
bsw/jbe@1309:   str = string.gsub(str, "<[Aa] *[Hh][Rr][Ee][Ff] *= *'([^']*)' *>", "[[%1 ")
bsw/jbe@1309:   str = string.gsub(str, "<[Aa] *[Hh][Rr][Ee][Ff] *= *([^ <>\"']*) *>", "[[%1 ")
bsw/jbe@1309:   str = string.gsub(str, "[Aa] *>", "]]")
bsw/jbe@1309:   str = string.gsub(str, "<[Hh]1 *>", "\n\n###### ")
bsw/jbe@1309:   str = string.gsub(str, "<[Hh]2 *>", "\n\n##### ")
bsw/jbe@1309:   str = string.gsub(str, "<[Hh]3 *>", "\n\n#### ")
bsw/jbe@1309:   str = string.gsub(str, "<[Hh]4 *>", "\n\n### ")
bsw/jbe@1309:   str = string.gsub(str, "<[Hh]5 *>", "\n\n## ")
bsw/jbe@1309:   str = string.gsub(str, "<[Hh]6 *>", "\n\n# ")
bsw/jbe@1309:   str = string.gsub(str, "[Hh]1 *>", " ######\n\n")
bsw/jbe@1309:   str = string.gsub(str, "[Hh]2 *>", " #####\n\n")
bsw/jbe@1309:   str = string.gsub(str, "[Hh]3 *>", " ####\n\n")
bsw/jbe@1309:   str = string.gsub(str, "[Hh]4 *>", " ###\n\n")
bsw/jbe@1309:   str = string.gsub(str, "[Hh]5 *>", " ##\n\n")
bsw/jbe@1309:   str = string.gsub(str, "[Hh]6 *>", " #\n\n")
bsw/jbe@1309:   local li_info = {}
bsw/jbe@1309:   local pos = 1
bsw/jbe@1309:   local counters = {}
bsw/jbe@1309:   while true do
bsw/jbe@1309:     local list_start, list_stop, list_tagname = string.find(str, "<(/?[OoUu]l) *>", pos)
bsw/jbe@1309:     if list_tagname then
bsw/jbe@1309:       list_tagname = string.lower(list_tagname)
bsw/jbe@1309:     end
bsw/jbe@1309:     local elem_start, elem_stop = string.find(str, "<[Ll][Ii] *>", pos)
bsw/jbe@1309:     if list_start and not elem_start then
bsw/jbe@1309:       pos = list_stop
bsw/jbe@1309:     elseif elem_start and not list_start then
bsw/jbe@1309:       pos = elem_stop
bsw/jbe@1309:     elseif list_start and elem_start then
bsw/jbe@1309:       if list_start < elem_start then
bsw/jbe@1309:         pos = list_stop
bsw/jbe@1309:       else
bsw/jbe@1309:         pos = elem_stop
bsw/jbe@1309:         list_tagname = nil
bsw/jbe@1309:       end
bsw/jbe@1309:     else
bsw/jbe@1309:       break
bsw/jbe@1309:     end
bsw/jbe@1309:     if list_tagname == "ol" then
bsw/jbe@1309:       counters[#counters+1] = 0
bsw/jbe@1309:     elseif list_tagname == "ul" then
bsw/jbe@1309:       counters[#counters+1] = false
bsw/jbe@1309:     elseif list_tagname then
bsw/jbe@1309:       counters[#counters] = nil
bsw/jbe@1309:     else
bsw/jbe@1309:       if counters[#counters] then
bsw/jbe@1309:         counters[#counters] = counters[#counters] + 1
bsw/jbe@1309:       end
bsw/jbe@1309:       local string_parts = {}
bsw/jbe@1309:       for idx, counter in ipairs(counters) do
bsw/jbe@1309:         if counter then
bsw/jbe@1309:           string_parts[idx] = tostring(counter) .. ". "
bsw/jbe@1309:         else
bsw/jbe@1309:           string_parts[idx] = "* "
bsw/jbe@1309:         end
bsw/jbe@1309:       end
bsw/jbe@1309:       li_info[#li_info+1] = table.concat(string_parts)
bsw/jbe@1309:     end
bsw/jbe@1309:   end
bsw/jbe@1309:   str = string.gsub(str, "?[OoUu]l *>", "\n\n")
bsw/jbe@1309:   local li_index = 0
bsw/jbe@1309:   str = string.gsub(str, "<[Ll][Ii] *>", function()
bsw/jbe@1309:     li_index = li_index + 1
bsw/jbe@1309:     return li_info[li_index]
bsw/jbe@1309:   end)
bsw/jbe@1309:   str = string.gsub(str, "[Ll][Ii] *>", "\n")
bsw/jbe@1309:   str = string.gsub(str, "<[^<>]*>", "")
bsw/jbe@1309:   str = string.gsub(str, "<", "<")
bsw/jbe@1309:   str = string.gsub(str, ">", ">")
bsw/jbe@1309:   str = string.gsub(str, "  +", " ")
bsw/jbe@1309:   str = string.gsub(str, "%f[^\0\n] ", "")
bsw/jbe@1309:   str = string.gsub(str, " %f[\0\n]", "")
bsw/jbe@1309:   str = string.gsub(str, "\n\n\n+", "\n\n")
bsw/jbe@1309:   str = string.gsub(str, "^\n+", "")
bsw/jbe@1309:   str = string.gsub(str, "\n*$", "\n")
bsw/jbe@1309:   return str
bsw/jbe@1309: end