@@ -7,11 +7,11 @@ package soup
7
7
import (
8
8
"errors"
9
9
"io/ioutil"
10
+ "log"
10
11
"net/http"
11
12
"regexp"
12
13
"strings"
13
14
14
- "github.com/anaskhan96/soup/fetch"
15
15
"golang.org/x/net/html"
16
16
)
17
17
@@ -41,7 +41,7 @@ func Header(n string, v string) {
41
41
42
42
// Get returns the HTML returned by the url in string
43
43
func Get (url string ) (string , error ) {
44
- defer fetch . CatchPanic ("Get()" )
44
+ defer catchPanic ("Get()" )
45
45
// Init a new HTTP client
46
46
client := & http.Client {}
47
47
req , err := http .NewRequest ("GET" , url , nil )
@@ -76,7 +76,7 @@ func Get(url string) (string, error) {
76
76
77
77
// HTMLParse parses the HTML returning a start pointer to the DOM
78
78
func HTMLParse (s string ) Root {
79
- defer fetch . CatchPanic ("HTMLParse()" )
79
+ defer catchPanic ("HTMLParse()" )
80
80
r , err := html .Parse (strings .NewReader (s ))
81
81
if err != nil {
82
82
if debug {
@@ -101,8 +101,8 @@ func HTMLParse(s string) Root {
101
101
// with or without attribute key and value specified,
102
102
// and returns a struct with a pointer to it
103
103
func (r Root ) Find (args ... string ) Root {
104
- defer fetch . CatchPanic ("Find()" )
105
- temp , ok := fetch . FindOnce (r .Pointer , args , false )
104
+ defer catchPanic ("Find()" )
105
+ temp , ok := findOnce (r .Pointer , args , false )
106
106
if ok == false {
107
107
if debug {
108
108
panic ("Element `" + args [0 ] + "` with attributes `" + strings .Join (args [1 :], " " ) + "` not found" )
@@ -117,8 +117,8 @@ func (r Root) Find(args ...string) Root {
117
117
// and returns an array of structs, each having
118
118
// the respective pointers
119
119
func (r Root ) FindAll (args ... string ) []Root {
120
- defer fetch . CatchPanic ("FindAll()" )
121
- temp := fetch . FindAllofem (r .Pointer , args )
120
+ defer catchPanic ("FindAll()" )
121
+ temp := findAllofem (r .Pointer , args )
122
122
if len (temp ) == 0 {
123
123
if debug {
124
124
panic ("Element `" + args [0 ] + "` with attributes `" + strings .Join (args [1 :], " " ) + "` not found" )
@@ -135,7 +135,7 @@ func (r Root) FindAll(args ...string) []Root {
135
135
// FindNextSibling finds the next sibling of the pointer in the DOM
136
136
// returning a struct with a pointer to it
137
137
func (r Root ) FindNextSibling () Root {
138
- defer fetch . CatchPanic ("FindNextSibling()" )
138
+ defer catchPanic ("FindNextSibling()" )
139
139
nextSibling := r .Pointer .NextSibling
140
140
if nextSibling == nil {
141
141
if debug {
@@ -149,7 +149,7 @@ func (r Root) FindNextSibling() Root {
149
149
// FindPrevSibling finds the previous sibling of the pointer in the DOM
150
150
// returning a struct with a pointer to it
151
151
func (r Root ) FindPrevSibling () Root {
152
- defer fetch . CatchPanic ("FindPrevSibling()" )
152
+ defer catchPanic ("FindPrevSibling()" )
153
153
prevSibling := r .Pointer .PrevSibling
154
154
if prevSibling == nil {
155
155
if debug {
@@ -163,7 +163,7 @@ func (r Root) FindPrevSibling() Root {
163
163
// FindNextElementSibling finds the next element sibling of the pointer in the DOM
164
164
// returning a struct with a pointer to it
165
165
func (r Root ) FindNextElementSibling () Root {
166
- defer fetch . CatchPanic ("FindNextElementSibling()" )
166
+ defer catchPanic ("FindNextElementSibling()" )
167
167
nextSibling := r .Pointer .NextSibling
168
168
if nextSibling == nil {
169
169
if debug {
@@ -181,7 +181,7 @@ func (r Root) FindNextElementSibling() Root {
181
181
// FindPrevElementSibling finds the previous element sibling of the pointer in the DOM
182
182
// returning a struct with a pointer to it
183
183
func (r Root ) FindPrevElementSibling () Root {
184
- defer fetch . CatchPanic ("FindPrevElementSibling()" )
184
+ defer catchPanic ("FindPrevElementSibling()" )
185
185
prevSibling := r .Pointer .PrevSibling
186
186
if prevSibling == nil {
187
187
if debug {
@@ -198,7 +198,7 @@ func (r Root) FindPrevElementSibling() Root {
198
198
199
199
// Attrs returns a map containing all attributes
200
200
func (r Root ) Attrs () map [string ]string {
201
- defer fetch . CatchPanic ("Attrs()" )
201
+ defer catchPanic ("Attrs()" )
202
202
if r .Pointer .Type != html .ElementNode {
203
203
if debug {
204
204
panic ("Not an ElementNode" )
@@ -208,12 +208,12 @@ func (r Root) Attrs() map[string]string {
208
208
if len (r .Pointer .Attr ) == 0 {
209
209
return nil
210
210
}
211
- return fetch . GetKeyValue (r .Pointer .Attr )
211
+ return getKeyValue (r .Pointer .Attr )
212
212
}
213
213
214
214
// Text returns the string inside a non-nested element
215
215
func (r Root ) Text () string {
216
- defer fetch . CatchPanic ("Text()" )
216
+ defer catchPanic ("Text()" )
217
217
k := r .Pointer .FirstChild
218
218
checkNode:
219
219
if k .Type != html .TextNode {
@@ -242,3 +242,74 @@ checkNode:
242
242
}
243
243
return ""
244
244
}
245
+
246
+ // Using depth first search to find the first occurrence and return
247
+ func findOnce (n * html.Node , args []string , uni bool ) (* html.Node , bool ) {
248
+ if uni == true {
249
+ if n .Type == html .ElementNode && n .Data == args [0 ] {
250
+ if len (args ) > 1 && len (args ) < 4 {
251
+ for i := 0 ; i < len (n .Attr ); i ++ {
252
+ if n .Attr [i ].Key == args [1 ] && n .Attr [i ].Val == args [2 ] {
253
+ return n , true
254
+ }
255
+ }
256
+ } else if len (args ) == 1 {
257
+ return n , true
258
+ }
259
+ }
260
+ }
261
+ uni = true
262
+ for c := n .FirstChild ; c != nil ; c = c .NextSibling {
263
+ p , q := findOnce (c , args , true )
264
+ if q != false {
265
+ return p , q
266
+ }
267
+ }
268
+ return nil , false
269
+ }
270
+
271
+ // Using depth first search to find all occurrences and return
272
+ func findAllofem (n * html.Node , args []string ) []* html.Node {
273
+ var nodeLinks = make ([]* html.Node , 0 , 10 )
274
+ var f func (* html.Node , []string , bool )
275
+ f = func (n * html.Node , args []string , uni bool ) {
276
+ if uni == true {
277
+ if n .Data == args [0 ] {
278
+ if len (args ) > 1 && len (args ) < 4 {
279
+ for i := 0 ; i < len (n .Attr ); i ++ {
280
+ if n .Attr [i ].Key == args [1 ] && n .Attr [i ].Val == args [2 ] {
281
+ nodeLinks = append (nodeLinks , n )
282
+ }
283
+ }
284
+ } else if len (args ) == 1 {
285
+ nodeLinks = append (nodeLinks , n )
286
+ }
287
+ }
288
+ }
289
+ uni = true
290
+ for c := n .FirstChild ; c != nil ; c = c .NextSibling {
291
+ f (c , args , true )
292
+ }
293
+ }
294
+ f (n , args , false )
295
+ return nodeLinks
296
+ }
297
+
298
+ // Returns a key pair value (like a dictionary) for each attribute
299
+ func getKeyValue (attributes []html.Attribute ) map [string ]string {
300
+ var keyvalues = make (map [string ]string )
301
+ for i := 0 ; i < len (attributes ); i ++ {
302
+ _ , exists := keyvalues [attributes [i ].Key ]
303
+ if exists == false {
304
+ keyvalues [attributes [i ].Key ] = attributes [i ].Val
305
+ }
306
+ }
307
+ return keyvalues
308
+ }
309
+
310
+ // Catch panics when they occur
311
+ func catchPanic (fnName string ) {
312
+ if r := recover (); r != nil {
313
+ log .Println ("Error occurred in" , fnName , ":" , r )
314
+ }
315
+ }
0 commit comments