@@ -6,7 +6,7 @@ package soup
6
6
7
7
import (
8
8
"bytes"
9
- "errors "
9
+ "fmt "
10
10
"io/ioutil"
11
11
"net/http"
12
12
"regexp"
@@ -15,20 +15,67 @@ import (
15
15
"golang.org/x/net/html"
16
16
)
17
17
18
- // Root is a structure containing a pointer to an html node, the node value, and an error variable to return an error if occurred
18
+ // ErrorType defines types of errors that are possible from soup
19
+ type ErrorType int
20
+
21
+ const (
22
+ // ErrUnableToParse will be returned when the HTML could not be parsed
23
+ ErrUnableToParse ErrorType = iota
24
+ // ErrElementNotFound will be returned when element was not found
25
+ ErrElementNotFound
26
+ // ErrNoNextSibling will be returned when no next sibling can be found
27
+ ErrNoNextSibling
28
+ // ErrNoPreviousSibling will be returned when no previous sibling can be found
29
+ ErrNoPreviousSibling
30
+ // ErrNoNextElementSibling will be returned when no next element sibling can be found
31
+ ErrNoNextElementSibling
32
+ // ErrNoPreviousElementSibling will be returned when no previous element sibling can be found
33
+ ErrNoPreviousElementSibling
34
+ // ErrCreatingGetRequest will be returned when the get request couldn't be created
35
+ ErrCreatingGetRequest
36
+ // ErrInGetRequest will be returned when there was an error during the get request
37
+ ErrInGetRequest
38
+ // ErrReadingResponse will be returned if there was an error reading the response to our get request
39
+ ErrReadingResponse
40
+ )
41
+
42
+ // Error allows easier introspection on the type of error returned.
43
+ // If you know you have a Error, you can compare the Type to one of the exported types
44
+ // from this package to see what kind of error it is, then further inspect the Error() method
45
+ // to see if it has more specific details for you, like in the case of a ErrElementNotFound
46
+ // type of error.
47
+ type Error struct {
48
+ Type ErrorType
49
+ msg string
50
+ }
51
+
52
+ func (se Error ) Error () string {
53
+ return se .msg
54
+ }
55
+
56
+ func newError (t ErrorType , msg string ) Error {
57
+ return Error {Type : t , msg : msg }
58
+ }
59
+
60
+ // Root is a structure containing a pointer to an html node, the node value, and an error variable to return an error if one occurred
19
61
type Root struct {
20
62
Pointer * html.Node
21
63
NodeValue string
22
64
Error error
23
65
}
24
66
25
- var debug = false
67
+ // Init a new HTTP client for use when the client doesn't want to use their own.
68
+ var (
69
+ defaultClient = & http.Client {}
26
70
27
- // Headers contains all HTTP headers to send
28
- var Headers = make (map [string ]string )
71
+ debug = false
29
72
30
- // Cookies contains all HTTP cookies to send
31
- var Cookies = make (map [string ]string )
73
+ // Headers contains all HTTP headers to send
74
+ Headers = make (map [string ]string )
75
+
76
+ // Cookies contains all HTTP cookies to send
77
+ Cookies = make (map [string ]string )
78
+ )
32
79
33
80
// SetDebug sets the debug status
34
81
// Setting this to true causes the panics to be thrown and logged onto the console.
@@ -42,6 +89,7 @@ func Header(n string, v string) {
42
89
Headers [n ] = v
43
90
}
44
91
92
+ // Cookie sets a cookie for http requests
45
93
func Cookie (n string , v string ) {
46
94
Cookies [n ] = v
47
95
}
@@ -53,7 +101,7 @@ func GetWithClient(url string, client *http.Client) (string, error) {
53
101
if debug {
54
102
panic ("Couldn't perform GET request to " + url )
55
103
}
56
- return "" , errors . New ( "couldn't perform GET request to " + url )
104
+ return "" , newError ( ErrCreatingGetRequest , "error creating get request to "+ url )
57
105
}
58
106
// Set headers
59
107
for hName , hValue := range Headers {
@@ -72,24 +120,22 @@ func GetWithClient(url string, client *http.Client) (string, error) {
72
120
if debug {
73
121
panic ("Couldn't perform GET request to " + url )
74
122
}
75
- return "" , errors . New ( "couldn't perform GET request to " + url )
123
+ return "" , newError ( ErrInGetRequest , "couldn't perform GET request to " + url )
76
124
}
77
125
defer resp .Body .Close ()
78
126
bytes , err := ioutil .ReadAll (resp .Body )
79
127
if err != nil {
80
128
if debug {
81
129
panic ("Unable to read the response body" )
82
130
}
83
- return "" , errors . New ( "unable to read the response body" )
131
+ return "" , newError ( ErrReadingResponse , "unable to read the response body" )
84
132
}
85
133
return string (bytes ), nil
86
134
}
87
135
88
136
// Get returns the HTML returned by the url in string using the default HTTP client
89
137
func Get (url string ) (string , error ) {
90
- // Init a new HTTP client
91
- client := & http.Client {}
92
- return GetWithClient (url , client )
138
+ return GetWithClient (url , defaultClient )
93
139
}
94
140
95
141
// HTMLParse parses the HTML returning a start pointer to the DOM
@@ -99,7 +145,7 @@ func HTMLParse(s string) Root {
99
145
if debug {
100
146
panic ("Unable to parse the HTML" )
101
147
}
102
- return Root {nil , "" , errors . New ( "unable to parse the HTML" )}
148
+ return Root {Error : newError ( ErrUnableToParse , "unable to parse the HTML" )}
103
149
}
104
150
for r .Type != html .ElementNode {
105
151
switch r .Type {
@@ -111,7 +157,7 @@ func HTMLParse(s string) Root {
111
157
r = r .NextSibling
112
158
}
113
159
}
114
- return Root {r , r .Data , nil }
160
+ return Root {Pointer : r , NodeValue : r .Data }
115
161
}
116
162
117
163
// Find finds the first occurrence of the given tag name,
@@ -123,9 +169,9 @@ func (r Root) Find(args ...string) Root {
123
169
if debug {
124
170
panic ("Element `" + args [0 ] + "` with attributes `" + strings .Join (args [1 :], " " ) + "` not found" )
125
171
}
126
- return Root {nil , "" , errors . New ("element `" + args [ 0 ] + " ` with attributes `" + strings .Join (args [1 :], " " ) + "` not found" )}
172
+ return Root {Error : newError ( ErrElementNotFound , fmt . Sprintf ("element `%s ` with attributes `%s` not found" , args [ 0 ], strings .Join (args [1 :], " " )) )}
127
173
}
128
- return Root {temp , temp .Data , nil }
174
+ return Root {Pointer : temp , NodeValue : temp .Data }
129
175
}
130
176
131
177
// FindAll finds all occurrences of the given tag name,
@@ -142,7 +188,7 @@ func (r Root) FindAll(args ...string) []Root {
142
188
}
143
189
pointers := make ([]Root , 0 , len (temp ))
144
190
for i := 0 ; i < len (temp ); i ++ {
145
- pointers = append (pointers , Root {temp [i ], temp [i ].Data , nil })
191
+ pointers = append (pointers , Root {Pointer : temp [i ], NodeValue : temp [i ].Data })
146
192
}
147
193
return pointers
148
194
}
@@ -155,9 +201,9 @@ func (r Root) FindStrict(args ...string) Root {
155
201
if debug {
156
202
panic ("Element `" + args [0 ] + "` with attributes `" + strings .Join (args [1 :], " " ) + "` not found" )
157
203
}
158
- return Root {nil , "" , errors . New ("element `" + args [ 0 ] + " ` with attributes `" + strings .Join (args [1 :], " " ) + "` not found" )}
204
+ return Root {nil , "" , newError ( ErrElementNotFound , fmt . Sprintf ("element `%s ` with attributes `%s` not found" , args [ 0 ], strings .Join (args [1 :], " " )) )}
159
205
}
160
- return Root {temp , temp .Data , nil }
206
+ return Root {Pointer : temp , NodeValue : temp .Data }
161
207
}
162
208
163
209
// FindAllStrict finds all occurrences of the given tag name
@@ -172,7 +218,7 @@ func (r Root) FindAllStrict(args ...string) []Root {
172
218
}
173
219
pointers := make ([]Root , 0 , len (temp ))
174
220
for i := 0 ; i < len (temp ); i ++ {
175
- pointers = append (pointers , Root {temp [i ], temp [i ].Data , nil })
221
+ pointers = append (pointers , Root {Pointer : temp [i ], NodeValue : temp [i ].Data })
176
222
}
177
223
return pointers
178
224
}
@@ -185,9 +231,9 @@ func (r Root) FindNextSibling() Root {
185
231
if debug {
186
232
panic ("No next sibling found" )
187
233
}
188
- return Root {nil , "" , errors . New ( "no next sibling found" )}
234
+ return Root {Error : newError ( ErrNoNextSibling , "no next sibling found" )}
189
235
}
190
- return Root {nextSibling , nextSibling .Data , nil }
236
+ return Root {Pointer : nextSibling , NodeValue : nextSibling .Data }
191
237
}
192
238
193
239
// FindPrevSibling finds the previous sibling of the pointer in the DOM
@@ -198,9 +244,10 @@ func (r Root) FindPrevSibling() Root {
198
244
if debug {
199
245
panic ("No previous sibling found" )
200
246
}
201
- return Root {nil , "" , errors .New ("no previous sibling found" )}
247
+
248
+ return Root {Error : newError (ErrNoPreviousSibling , "no previous sibling found" )}
202
249
}
203
- return Root {prevSibling , prevSibling .Data , nil }
250
+ return Root {Pointer : prevSibling , NodeValue : prevSibling .Data }
204
251
}
205
252
206
253
// FindNextElementSibling finds the next element sibling of the pointer in the DOM
@@ -211,12 +258,12 @@ func (r Root) FindNextElementSibling() Root {
211
258
if debug {
212
259
panic ("No next element sibling found" )
213
260
}
214
- return Root {nil , "" , errors . New ( "no next element sibling found" )}
261
+ return Root {Error : newError ( ErrNoNextElementSibling , "no next element sibling found" )}
215
262
}
216
263
if nextSibling .Type == html .ElementNode {
217
- return Root {nextSibling , nextSibling .Data , nil }
264
+ return Root {Pointer : nextSibling , NodeValue : nextSibling .Data }
218
265
}
219
- p := Root {nextSibling , nextSibling .Data , nil }
266
+ p := Root {Pointer : nextSibling , NodeValue : nextSibling .Data }
220
267
return p .FindNextElementSibling ()
221
268
}
222
269
@@ -228,12 +275,12 @@ func (r Root) FindPrevElementSibling() Root {
228
275
if debug {
229
276
panic ("No previous element sibling found" )
230
277
}
231
- return Root {nil , "" , errors . New ( "no previous element sibling found" )}
278
+ return Root {Error : newError ( ErrNoPreviousElementSibling , "no previous element sibling found" )}
232
279
}
233
280
if prevSibling .Type == html .ElementNode {
234
- return Root {prevSibling , prevSibling .Data , nil }
281
+ return Root {Pointer : prevSibling , NodeValue : prevSibling .Data }
235
282
}
236
- p := Root {prevSibling , prevSibling .Data , nil }
283
+ p := Root {Pointer : prevSibling , NodeValue : prevSibling .Data }
237
284
return p .FindPrevElementSibling ()
238
285
}
239
286
@@ -242,7 +289,7 @@ func (r Root) Children() []Root {
242
289
child := r .Pointer .FirstChild
243
290
var children []Root
244
291
for child != nil {
245
- children = append (children , Root {child , child .Data , nil })
292
+ children = append (children , Root {Pointer : child , NodeValue : child .Data })
246
293
child = child .NextSibling
247
294
}
248
295
return children
0 commit comments