-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtext.go
69 lines (61 loc) · 1.38 KB
/
text.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
package htmlx
import (
"io"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
// WriteText walk all the invested text nodes and records the text from them
// to the specified StringWriter. Ignore comments.
//
// BUG: <noscript> parsed as text.
func WriteText(w io.StringWriter, n *html.Node, ignore map[atom.Atom]bool) error {
if n == nil {
return nil
}
var output func(*html.Node) error
output = func(n *html.Node) (err error) {
switch n.Type {
case html.TextNode:
_, err = w.WriteString(
html.UnescapeString(n.Data))
return err
case html.CommentNode:
return nil
case html.ElementNode:
if ignore[n.DataAtom] {
return nil
}
}
for child := n.FirstChild; child != nil; child = child.NextSibling {
if err = output(child); err != nil {
return err
}
}
return nil
}
return output(n)
}
// TextIgnoreAtom specifies the list of items whose contents are ignored when
// working with text nodes.
var TextIgnoreAtom = map[atom.Atom]bool{
// ignore text inside
atom.Head: true,
atom.Script: true,
atom.Style: true,
// self closed (optimization)
atom.Area: true,
atom.Base: true,
atom.Br: true,
atom.Col: true,
atom.Embed: true,
atom.Hr: true,
atom.Img: true,
atom.Input: true,
atom.Keygen: true,
atom.Link: true,
atom.Meta: true,
atom.Param: true,
atom.Source: true,
atom.Track: true,
atom.Wbr: true,
}