liquid_feedback_frontend
diff env/util/html_to_text.lua @ 1309:32cc544d5a5b
Cumulative patch for upcoming frontend version 4
author | bsw/jbe |
---|---|
date | Sun Jul 15 14:07:29 2018 +0200 (2018-07-15) |
parents | |
children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/env/util/html_to_text.lua Sun Jul 15 14:07:29 2018 +0200 1.3 @@ -0,0 +1,86 @@ 1.4 +function util.html_to_text(str) 1.5 + str = string.gsub(str, "[\0-\32]", " ") 1.6 + str = string.gsub(str, "<[Bb][Rr] */?>", "\n") 1.7 + str = string.gsub(str, "</?[Pp] *>", "\n\n") 1.8 + str = string.gsub(str, "</?[Bb] *>", "**") 1.9 + str = string.gsub(str, "</?[Ii] *>", "//") 1.10 + str = string.gsub(str, "</?[Ss][Uu][Bb] *>", "__") 1.11 + str = string.gsub(str, "</?[Ss][Uu][Pp] *>", "^^") 1.12 + str = string.gsub(str, '<[Aa] *[Hh][Rr][Ee][Ff] *= *"([^"]*)" *>', "[[%1 ") 1.13 + str = string.gsub(str, "<[Aa] *[Hh][Rr][Ee][Ff] *= *'([^']*)' *>", "[[%1 ") 1.14 + str = string.gsub(str, "<[Aa] *[Hh][Rr][Ee][Ff] *= *([^ <>\"']*) *>", "[[%1 ") 1.15 + str = string.gsub(str, "</[Aa] *>", "]]") 1.16 + str = string.gsub(str, "<[Hh]1 *>", "\n\n###### ") 1.17 + str = string.gsub(str, "<[Hh]2 *>", "\n\n##### ") 1.18 + str = string.gsub(str, "<[Hh]3 *>", "\n\n#### ") 1.19 + str = string.gsub(str, "<[Hh]4 *>", "\n\n### ") 1.20 + str = string.gsub(str, "<[Hh]5 *>", "\n\n## ") 1.21 + str = string.gsub(str, "<[Hh]6 *>", "\n\n# ") 1.22 + str = string.gsub(str, "</[Hh]1 *>", " ######\n\n") 1.23 + str = string.gsub(str, "</[Hh]2 *>", " #####\n\n") 1.24 + str = string.gsub(str, "</[Hh]3 *>", " ####\n\n") 1.25 + str = string.gsub(str, "</[Hh]4 *>", " ###\n\n") 1.26 + str = string.gsub(str, "</[Hh]5 *>", " ##\n\n") 1.27 + str = string.gsub(str, "</[Hh]6 *>", " #\n\n") 1.28 + local li_info = {} 1.29 + local pos = 1 1.30 + local counters = {} 1.31 + while true do 1.32 + local list_start, list_stop, list_tagname = string.find(str, "<(/?[OoUu]l) *>", pos) 1.33 + if list_tagname then 1.34 + list_tagname = string.lower(list_tagname) 1.35 + end 1.36 + local elem_start, elem_stop = string.find(str, "<[Ll][Ii] *>", pos) 1.37 + if list_start and not elem_start then 1.38 + pos = list_stop 1.39 + elseif elem_start and not list_start then 1.40 + pos = elem_stop 1.41 + elseif list_start and elem_start then 1.42 + if list_start < elem_start then 1.43 + pos = list_stop 1.44 + else 1.45 + pos = elem_stop 1.46 + list_tagname = nil 1.47 + end 1.48 + else 1.49 + break 1.50 + end 1.51 + if list_tagname == "ol" then 1.52 + counters[#counters+1] = 0 1.53 + elseif list_tagname == "ul" then 1.54 + counters[#counters+1] = false 1.55 + elseif list_tagname then 1.56 + counters[#counters] = nil 1.57 + else 1.58 + if counters[#counters] then 1.59 + counters[#counters] = counters[#counters] + 1 1.60 + end 1.61 + local string_parts = {} 1.62 + for idx, counter in ipairs(counters) do 1.63 + if counter then 1.64 + string_parts[idx] = tostring(counter) .. ". " 1.65 + else 1.66 + string_parts[idx] = "* " 1.67 + end 1.68 + end 1.69 + li_info[#li_info+1] = table.concat(string_parts) 1.70 + end 1.71 + end 1.72 + str = string.gsub(str, "</?[OoUu]l *>", "\n\n") 1.73 + local li_index = 0 1.74 + str = string.gsub(str, "<[Ll][Ii] *>", function() 1.75 + li_index = li_index + 1 1.76 + return li_info[li_index] 1.77 + end) 1.78 + str = string.gsub(str, "</[Ll][Ii] *>", "\n") 1.79 + str = string.gsub(str, "<[^<>]*>", "") 1.80 + str = string.gsub(str, "<", "<") 1.81 + str = string.gsub(str, ">", ">") 1.82 + str = string.gsub(str, " +", " ") 1.83 + str = string.gsub(str, "%f[^\0\n] ", "") 1.84 + str = string.gsub(str, " %f[\0\n]", "") 1.85 + str = string.gsub(str, "\n\n\n+", "\n\n") 1.86 + str = string.gsub(str, "^\n+", "") 1.87 + str = string.gsub(str, "\n*$", "\n") 1.88 + return str 1.89 +end