rev |
line source |
bsw/jbe@1309
|
1 function util.html_to_text(str)
|
bsw/jbe@1309
|
2 str = string.gsub(str, "[\0-\32]", " ")
|
bsw/jbe@1309
|
3 str = string.gsub(str, "<[Bb][Rr] */?>", "\n")
|
bsw/jbe@1309
|
4 str = string.gsub(str, "</?[Pp] *>", "\n\n")
|
bsw/jbe@1309
|
5 str = string.gsub(str, "</?[Bb] *>", "**")
|
bsw/jbe@1309
|
6 str = string.gsub(str, "</?[Ii] *>", "//")
|
bsw/jbe@1309
|
7 str = string.gsub(str, "</?[Ss][Uu][Bb] *>", "__")
|
bsw/jbe@1309
|
8 str = string.gsub(str, "</?[Ss][Uu][Pp] *>", "^^")
|
bsw/jbe@1309
|
9 str = string.gsub(str, '<[Aa] *[Hh][Rr][Ee][Ff] *= *"([^"]*)" *>', "[[%1 ")
|
bsw/jbe@1309
|
10 str = string.gsub(str, "<[Aa] *[Hh][Rr][Ee][Ff] *= *'([^']*)' *>", "[[%1 ")
|
bsw/jbe@1309
|
11 str = string.gsub(str, "<[Aa] *[Hh][Rr][Ee][Ff] *= *([^ <>\"']*) *>", "[[%1 ")
|
bsw/jbe@1309
|
12 str = string.gsub(str, "</[Aa] *>", "]]")
|
bsw/jbe@1309
|
13 str = string.gsub(str, "<[Hh]1 *>", "\n\n###### ")
|
bsw/jbe@1309
|
14 str = string.gsub(str, "<[Hh]2 *>", "\n\n##### ")
|
bsw/jbe@1309
|
15 str = string.gsub(str, "<[Hh]3 *>", "\n\n#### ")
|
bsw/jbe@1309
|
16 str = string.gsub(str, "<[Hh]4 *>", "\n\n### ")
|
bsw/jbe@1309
|
17 str = string.gsub(str, "<[Hh]5 *>", "\n\n## ")
|
bsw/jbe@1309
|
18 str = string.gsub(str, "<[Hh]6 *>", "\n\n# ")
|
bsw/jbe@1309
|
19 str = string.gsub(str, "</[Hh]1 *>", " ######\n\n")
|
bsw/jbe@1309
|
20 str = string.gsub(str, "</[Hh]2 *>", " #####\n\n")
|
bsw/jbe@1309
|
21 str = string.gsub(str, "</[Hh]3 *>", " ####\n\n")
|
bsw/jbe@1309
|
22 str = string.gsub(str, "</[Hh]4 *>", " ###\n\n")
|
bsw/jbe@1309
|
23 str = string.gsub(str, "</[Hh]5 *>", " ##\n\n")
|
bsw/jbe@1309
|
24 str = string.gsub(str, "</[Hh]6 *>", " #\n\n")
|
bsw/jbe@1309
|
25 local li_info = {}
|
bsw/jbe@1309
|
26 local pos = 1
|
bsw/jbe@1309
|
27 local counters = {}
|
bsw/jbe@1309
|
28 while true do
|
bsw/jbe@1309
|
29 local list_start, list_stop, list_tagname = string.find(str, "<(/?[OoUu]l) *>", pos)
|
bsw/jbe@1309
|
30 if list_tagname then
|
bsw/jbe@1309
|
31 list_tagname = string.lower(list_tagname)
|
bsw/jbe@1309
|
32 end
|
bsw/jbe@1309
|
33 local elem_start, elem_stop = string.find(str, "<[Ll][Ii] *>", pos)
|
bsw/jbe@1309
|
34 if list_start and not elem_start then
|
bsw/jbe@1309
|
35 pos = list_stop
|
bsw/jbe@1309
|
36 elseif elem_start and not list_start then
|
bsw/jbe@1309
|
37 pos = elem_stop
|
bsw/jbe@1309
|
38 elseif list_start and elem_start then
|
bsw/jbe@1309
|
39 if list_start < elem_start then
|
bsw/jbe@1309
|
40 pos = list_stop
|
bsw/jbe@1309
|
41 else
|
bsw/jbe@1309
|
42 pos = elem_stop
|
bsw/jbe@1309
|
43 list_tagname = nil
|
bsw/jbe@1309
|
44 end
|
bsw/jbe@1309
|
45 else
|
bsw/jbe@1309
|
46 break
|
bsw/jbe@1309
|
47 end
|
bsw/jbe@1309
|
48 if list_tagname == "ol" then
|
bsw/jbe@1309
|
49 counters[#counters+1] = 0
|
bsw/jbe@1309
|
50 elseif list_tagname == "ul" then
|
bsw/jbe@1309
|
51 counters[#counters+1] = false
|
bsw/jbe@1309
|
52 elseif list_tagname then
|
bsw/jbe@1309
|
53 counters[#counters] = nil
|
bsw/jbe@1309
|
54 else
|
bsw/jbe@1309
|
55 if counters[#counters] then
|
bsw/jbe@1309
|
56 counters[#counters] = counters[#counters] + 1
|
bsw/jbe@1309
|
57 end
|
bsw/jbe@1309
|
58 local string_parts = {}
|
bsw/jbe@1309
|
59 for idx, counter in ipairs(counters) do
|
bsw/jbe@1309
|
60 if counter then
|
bsw/jbe@1309
|
61 string_parts[idx] = tostring(counter) .. ". "
|
bsw/jbe@1309
|
62 else
|
bsw/jbe@1309
|
63 string_parts[idx] = "* "
|
bsw/jbe@1309
|
64 end
|
bsw/jbe@1309
|
65 end
|
bsw/jbe@1309
|
66 li_info[#li_info+1] = table.concat(string_parts)
|
bsw/jbe@1309
|
67 end
|
bsw/jbe@1309
|
68 end
|
bsw/jbe@1309
|
69 str = string.gsub(str, "</?[OoUu]l *>", "\n\n")
|
bsw/jbe@1309
|
70 local li_index = 0
|
bsw/jbe@1309
|
71 str = string.gsub(str, "<[Ll][Ii] *>", function()
|
bsw/jbe@1309
|
72 li_index = li_index + 1
|
bsw/jbe@1309
|
73 return li_info[li_index]
|
bsw/jbe@1309
|
74 end)
|
bsw/jbe@1309
|
75 str = string.gsub(str, "</[Ll][Ii] *>", "\n")
|
bsw/jbe@1309
|
76 str = string.gsub(str, "<[^<>]*>", "")
|
bsw/jbe@1309
|
77 str = string.gsub(str, "<", "<")
|
bsw/jbe@1309
|
78 str = string.gsub(str, ">", ">")
|
bsw/jbe@1309
|
79 str = string.gsub(str, " +", " ")
|
bsw/jbe@1309
|
80 str = string.gsub(str, "%f[^\0\n] ", "")
|
bsw/jbe@1309
|
81 str = string.gsub(str, " %f[\0\n]", "")
|
bsw/jbe@1309
|
82 str = string.gsub(str, "\n\n\n+", "\n\n")
|
bsw/jbe@1309
|
83 str = string.gsub(str, "^\n+", "")
|
bsw/jbe@1309
|
84 str = string.gsub(str, "\n*$", "\n")
|
bsw/jbe@1309
|
85 return str
|
bsw/jbe@1309
|
86 end
|