Skip to content

Commit e995031

Browse files
authored
Use slightly customised exec allocator for the Headless Chrome execution (#3)
* Use slightly customised exec allocator for the Headless Chrome execution * Minor code tweaks * Headless chrome task tweaks * Bumped chromedp, added ability to provide a custom user agent * CHANGELOG * Version bump * Pass ua option in the CLI mode
1 parent 12aa57d commit e995031

File tree

8 files changed

+67
-33
lines changed

8 files changed

+67
-33
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
# Changelog
22
All notable changes to this project will be documented in this file.
33

4+
## 0.14.0
5+
- bumped Go to 1.22;
6+
- bumped `github.com/chromedp/chromedp` to v0.9.2;
7+
- new argument `-ua` to provide custome User Agent for the HTTP headless calls.
8+
49
## 0.13.0
510
- bumped Go to 1.20;
611
- bumped `github.com/chromedp/chromedp` to v0.9.1;

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
.PHONY: deps clean build
22

3-
TAG=0.13.0
3+
TAG=0.14.0
44
BINARY=inout
55
DIST_DIR=_dist
66
OS=darwin

cmd/cli/cli.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ var (
1717
ready = flag.String("r", "", "DOM CSS query, waits until element available, returns the whole HTML document")
1818
until = flag.Duration("u", 0, "duration to wait before getting HTML contents, handy for SPAs, because they keep loading in browsers for some time")
1919
img = flag.String("i", "", "enables capturing screenshot in the provided path")
20+
ua = flag.String("ua", "", "provide a custom user agent for headless HTTP calls")
2021
timeout = flag.Duration("t", 5*time.Second, "timeout for the whole fetch, e.g. \"-d 1s\" sets timeout to 1 second")
2122
verbose = flag.Bool("v", false, "\"-v\" enables verbose mode")
2223
)
@@ -37,7 +38,9 @@ func main() {
3738
inout.WaitUntil(*until),
3839
inout.Screenshot(len(*img) > 0),
3940
inout.Timeout(ttl),
40-
inout.Verbose(*verbose))
41+
inout.Verbose(*verbose),
42+
inout.UserAgent(*ua),
43+
)
4144
if err != nil {
4245
fmt.Fprintf(os.Stderr, "failed to create reader: %v\n", err)
4346
os.Exit(1)

go.mod

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
module github.com/zoomio/inout
22

3-
go 1.20
3+
go 1.22
44

55
require (
6-
github.com/chromedp/cdproto v0.0.0-20230220211738-2b1ec77315c9
7-
github.com/chromedp/chromedp v0.9.1
6+
github.com/chromedp/cdproto v0.0.0-20230802225258-3cf4e6d46a89
7+
github.com/chromedp/chromedp v0.9.2
88
github.com/stretchr/testify v1.8.4
99
)
1010

@@ -13,7 +13,7 @@ require (
1313
github.com/davecgh/go-spew v1.1.1 // indirect
1414
github.com/gobwas/httphead v0.1.0 // indirect
1515
github.com/gobwas/pool v0.2.1 // indirect
16-
github.com/gobwas/ws v1.1.0 // indirect
16+
github.com/gobwas/ws v1.2.1 // indirect
1717
github.com/josharian/intern v1.0.0 // indirect
1818
github.com/kr/pretty v0.2.0 // indirect
1919
github.com/mailru/easyjson v0.7.7 // indirect

go.sum

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
github.com/chromedp/cdproto v0.0.0-20230220211738-2b1ec77315c9 h1:wMSvdj3BswqfQOXp2R1bJOAE7xIQLt2dlMQDMf836VY=
2-
github.com/chromedp/cdproto v0.0.0-20230220211738-2b1ec77315c9/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs=
3-
github.com/chromedp/chromedp v0.9.1 h1:CC7cC5p1BeLiiS2gfNNPwp3OaUxtRMBjfiw3E3k6dFA=
4-
github.com/chromedp/chromedp v0.9.1/go.mod h1:DUgZWRvYoEfgi66CgZ/9Yv+psgi+Sksy5DTScENWjaQ=
1+
github.com/chromedp/cdproto v0.0.0-20230802225258-3cf4e6d46a89 h1:aPflPkRFkVwbW6dmcVqfgwp1i+UWGFH6VgR1Jim5Ygc=
2+
github.com/chromedp/cdproto v0.0.0-20230802225258-3cf4e6d46a89/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs=
3+
github.com/chromedp/chromedp v0.9.2 h1:dKtNz4kApb06KuSXoTQIyUC2TrA0fhGDwNZf3bcgfKw=
4+
github.com/chromedp/chromedp v0.9.2/go.mod h1:LkSXJKONWTCHAfQasKFUZI+mxqS4tZqhmtGzzhLsnLs=
55
github.com/chromedp/sysutil v1.0.0 h1:+ZxhTpfpZlmchB58ih/LBHX52ky7w2VhQVKQMucy3Ic=
66
github.com/chromedp/sysutil v1.0.0/go.mod h1:kgWmDdq8fTzXYcKIBqIYvRRTnYb9aNS9moAV0xufSww=
77
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
@@ -10,8 +10,8 @@ github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU
1010
github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM=
1111
github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og=
1212
github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw=
13-
github.com/gobwas/ws v1.1.0 h1:7RFti/xnNkMJnrK7D1yQ/iCIB5OrrY/54/H930kIbHA=
14-
github.com/gobwas/ws v1.1.0/go.mod h1:nzvNcVha5eUziGrbxFCo6qFIojQHjJV5cLYIbezhfL0=
13+
github.com/gobwas/ws v1.2.1 h1:F2aeBZrm2NDsc7vbovKrWSogd4wvfAxg0FQ89/iqOTk=
14+
github.com/gobwas/ws v1.2.1/go.mod h1:hRKAFb8wOxFROYNsT1bqfWnhX+b5MFeJM9r2ZSwg/KY=
1515
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
1616
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
1717
github.com/kr/pretty v0.2.0 h1:s5hAObm+yFO5uHYt5dYjxi2rXrsnmRpJx4OYvIWUaQs=
@@ -29,7 +29,6 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
2929
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
3030
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
3131
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
32-
golang.org/x/sys v0.0.0-20201207223542-d4d67f95c62d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
3332
golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
3433
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
3534
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=

headless.go

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77

88
"github.com/chromedp/cdproto/cdp"
99
"github.com/chromedp/cdproto/dom"
10+
"github.com/chromedp/cdproto/target"
1011
"github.com/chromedp/chromedp"
1112
)
1213

@@ -16,13 +17,26 @@ type headlesResult struct {
1617
}
1718

1819
func headless(ctx context.Context, c *config) (*headlesResult, error) {
19-
// create context
20-
childCtx, cancel := chromedp.NewContext(ctx)
20+
opts := append(chromedp.DefaultExecAllocatorOptions[:],
21+
chromedp.DisableGPU,
22+
)
23+
if c.userAgent != "" {
24+
opts = append(opts, chromedp.UserAgent(c.userAgent))
25+
}
26+
// Create an allocator
27+
allocatorCtx, allocatorCancel := chromedp.NewExecAllocator(ctx, opts...)
28+
defer allocatorCancel()
29+
30+
// Create a new context with the allocator
31+
childCtx, cancel := chromedp.NewContext(allocatorCtx)
2132
defer cancel()
2233

2334
var res strings.Builder
2435
var img []byte
25-
if err := chromedp.Run(childCtx, chromeTasks(c, &res, 90, &img)); err != nil {
36+
if err := chromedp.Run(
37+
childCtx,
38+
// chromedp.Emulate(device.IPhone7), uncomment to emulate devices
39+
chromeTasks(c, &res, 90, &img)); err != nil {
2640
err = fmt.Errorf("error in running headless to %s: %w", c.source, err)
2741
return nil, err
2842
}
@@ -32,7 +46,12 @@ func headless(ctx context.Context, c *config) (*headlesResult, error) {
3246

3347
// chromeTasks ...
3448
func chromeTasks(c *config, res *strings.Builder, quality int, buf *[]byte) chromedp.Tasks {
35-
tasks := []chromedp.Action{}
49+
tasks := []chromedp.Action{chromedp.ActionFunc(func(ctx context.Context) error {
50+
c := chromedp.FromContext(ctx)
51+
_, err := target.CreateBrowserContext().Do(cdp.WithExecutor(ctx, c.Browser))
52+
return err
53+
})}
54+
3655
if c.screenshot {
3756
tasks = append(tasks, chromedp.EmulateViewport(1920, 2000))
3857
}
@@ -60,12 +79,12 @@ func chromeTasks(c *config, res *strings.Builder, quality int, buf *[]byte) chro
6079
}),
6180
)
6281
} else {
63-
tasks = append(tasks, chromedp.ActionFunc(func(c context.Context) error {
64-
node, err := dom.GetDocument().Do(c)
82+
tasks = append(tasks, chromedp.ActionFunc(func(ctx context.Context) error {
83+
node, err := dom.GetDocument().Do(ctx)
6584
if err != nil {
6685
return err
6786
}
68-
str, err := dom.GetOuterHTML().WithNodeID(node.NodeID).Do(c)
87+
str, err := dom.GetOuterHTML().WithNodeID(node.NodeID).Do(ctx)
6988
if err != nil {
7089
return err
7190
}

options.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,11 @@ var (
5454
c.verbose = verbose
5555
}
5656
}
57+
58+
// UserAgent custom user agent for healess Chrome operations.
59+
UserAgent = func(ua string) Option {
60+
return func(c *config) {
61+
c.userAgent = ua
62+
}
63+
}
5764
)

reader.go

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -34,27 +34,27 @@ func New(ctx context.Context, source string) (Reader, error) {
3434
// source - the filename or web page name, reads from STDIN if name is empty.
3535
// Panics on errors.
3636
func NewInOut(ctx context.Context, options ...Option) (Reader, error) {
37-
c := &config{}
37+
cfg := &config{}
3838

3939
// apply custom configuration
4040
for _, option := range options {
41-
option(c)
41+
option(cfg)
4242
}
4343

4444
childCtx, cancel := context.WithCancel(ctx)
4545
defer cancel()
4646
start := time.Now()
47-
end := start.Add(c.timeout)
47+
end := start.Add(cfg.timeout)
4848
go func() {
4949
ticker := time.NewTicker(30 * time.Millisecond)
5050
for {
5151
select {
5252
case <-ctx.Done():
5353
return
5454
case <-ticker.C:
55-
if c.timeout > 0 && end.Before(time.Now()) {
56-
if c.verbose {
57-
fmt.Printf("timeout of %v passed, stopping...\n", c.timeout)
55+
if cfg.timeout > 0 && end.Before(time.Now()) {
56+
if cfg.verbose {
57+
fmt.Printf("timeout of %v passed, stopping...\n", cfg.timeout)
5858
}
5959
cancel()
6060
return
@@ -67,32 +67,32 @@ func NewInOut(ctx context.Context, options ...Option) (Reader, error) {
6767
var err error
6868

6969
// STDIN
70-
if c.source == "" {
70+
if cfg.source == "" {
7171
r.reader, err = handleSTDIN()
7272
if err != nil {
7373
return *r, err
7474
}
7575

7676
// HTTP
77-
} else if c.isHTTP() {
78-
if c.verbose {
77+
} else if cfg.isHTTP() {
78+
if cfg.verbose {
7979
fmt.Println("source is HTTP/HTTPS")
8080
}
81-
r.reader, r.ImgBytes, err = handleHTTP(childCtx, c)
81+
r.reader, r.ImgBytes, err = handleHTTP(childCtx, cfg)
8282
if err != nil {
8383
return *r, err
8484
}
8585

8686
// File system
87-
} else if c.isFS() {
88-
r.reader, err = handleFS(c.source)
87+
} else if cfg.isFS() {
88+
r.reader, err = handleFS(cfg.source)
8989
if err != nil {
9090
return *r, err
9191
}
9292

9393
// Unresolvable "source"
9494
} else {
95-
return *r, fmt.Errorf("unknown type of provided input source: %s", c.source)
95+
return *r, fmt.Errorf("unknown type of provided input source: %s", cfg.source)
9696
}
9797

9898
r.buffer = make([]byte, 64*1024)
@@ -174,6 +174,7 @@ type config struct {
174174
screenshot bool
175175
timeout time.Duration
176176
verbose bool
177+
userAgent string
177178
}
178179

179180
func (c *config) isHTTP() bool {

0 commit comments

Comments
 (0)