bsw/jbe@1309: function util.html_to_text(str) bsw/jbe@1309: str = string.gsub(str, "[\0-\32]", " ") bsw/jbe@1309: str = string.gsub(str, "<[Bb][Rr] */?>", "\n") bsw/jbe@1309: str = string.gsub(str, "", "\n\n") bsw/jbe@1309: str = string.gsub(str, "", "**") bsw/jbe@1309: str = string.gsub(str, "", "//") bsw/jbe@1309: str = string.gsub(str, "", "__") bsw/jbe@1309: str = string.gsub(str, "", "^^") bsw/jbe@1309: str = string.gsub(str, '<[Aa] *[Hh][Rr][Ee][Ff] *= *"([^"]*)" *>', "[[%1 ") bsw/jbe@1309: str = string.gsub(str, "<[Aa] *[Hh][Rr][Ee][Ff] *= *'([^']*)' *>", "[[%1 ") bsw/jbe@1309: str = string.gsub(str, "<[Aa] *[Hh][Rr][Ee][Ff] *= *([^ <>\"']*) *>", "[[%1 ") bsw/jbe@1309: str = string.gsub(str, "", "]]") bsw/jbe@1309: str = string.gsub(str, "<[Hh]1 *>", "\n\n###### ") bsw/jbe@1309: str = string.gsub(str, "<[Hh]2 *>", "\n\n##### ") bsw/jbe@1309: str = string.gsub(str, "<[Hh]3 *>", "\n\n#### ") bsw/jbe@1309: str = string.gsub(str, "<[Hh]4 *>", "\n\n### ") bsw/jbe@1309: str = string.gsub(str, "<[Hh]5 *>", "\n\n## ") bsw/jbe@1309: str = string.gsub(str, "<[Hh]6 *>", "\n\n# ") bsw/jbe@1309: str = string.gsub(str, "", " ######\n\n") bsw/jbe@1309: str = string.gsub(str, "", " #####\n\n") bsw/jbe@1309: str = string.gsub(str, "", " ####\n\n") bsw/jbe@1309: str = string.gsub(str, "", " ###\n\n") bsw/jbe@1309: str = string.gsub(str, "", " ##\n\n") bsw/jbe@1309: str = string.gsub(str, "", " #\n\n") bsw/jbe@1309: local li_info = {} bsw/jbe@1309: local pos = 1 bsw/jbe@1309: local counters = {} bsw/jbe@1309: while true do bsw/jbe@1309: local list_start, list_stop, list_tagname = string.find(str, "<(/?[OoUu]l) *>", pos) bsw/jbe@1309: if list_tagname then bsw/jbe@1309: list_tagname = string.lower(list_tagname) bsw/jbe@1309: end bsw/jbe@1309: local elem_start, elem_stop = string.find(str, "<[Ll][Ii] *>", pos) bsw/jbe@1309: if list_start and not elem_start then bsw/jbe@1309: pos = list_stop bsw/jbe@1309: elseif elem_start and not list_start then bsw/jbe@1309: pos = elem_stop bsw/jbe@1309: elseif list_start and elem_start then bsw/jbe@1309: if list_start < elem_start then bsw/jbe@1309: pos = list_stop bsw/jbe@1309: else bsw/jbe@1309: pos = elem_stop bsw/jbe@1309: list_tagname = nil bsw/jbe@1309: end bsw/jbe@1309: else bsw/jbe@1309: break bsw/jbe@1309: end bsw/jbe@1309: if list_tagname == "ol" then bsw/jbe@1309: counters[#counters+1] = 0 bsw/jbe@1309: elseif list_tagname == "ul" then bsw/jbe@1309: counters[#counters+1] = false bsw/jbe@1309: elseif list_tagname then bsw/jbe@1309: counters[#counters] = nil bsw/jbe@1309: else bsw/jbe@1309: if counters[#counters] then bsw/jbe@1309: counters[#counters] = counters[#counters] + 1 bsw/jbe@1309: end bsw/jbe@1309: local string_parts = {} bsw/jbe@1309: for idx, counter in ipairs(counters) do bsw/jbe@1309: if counter then bsw/jbe@1309: string_parts[idx] = tostring(counter) .. ". " bsw/jbe@1309: else bsw/jbe@1309: string_parts[idx] = "* " bsw/jbe@1309: end bsw/jbe@1309: end bsw/jbe@1309: li_info[#li_info+1] = table.concat(string_parts) bsw/jbe@1309: end bsw/jbe@1309: end bsw/jbe@1309: str = string.gsub(str, "", "\n\n") bsw/jbe@1309: local li_index = 0 bsw/jbe@1309: str = string.gsub(str, "<[Ll][Ii] *>", function() bsw/jbe@1309: li_index = li_index + 1 bsw/jbe@1309: return li_info[li_index] bsw/jbe@1309: end) bsw/jbe@1309: str = string.gsub(str, "", "\n") bsw/jbe@1309: str = string.gsub(str, "<[^<>]*>", "") bsw/jbe@1309: str = string.gsub(str, "<", "<") bsw/jbe@1309: str = string.gsub(str, ">", ">") bsw/jbe@1309: str = string.gsub(str, " +", " ") bsw/jbe@1309: str = string.gsub(str, "%f[^\0\n] ", "") bsw/jbe@1309: str = string.gsub(str, " %f[\0\n]", "") bsw/jbe@1309: str = string.gsub(str, "\n\n\n+", "\n\n") bsw/jbe@1309: str = string.gsub(str, "^\n+", "") bsw/jbe@1309: str = string.gsub(str, "\n*$", "\n") bsw/jbe@1309: return str bsw/jbe@1309: end