-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.go
190 lines (158 loc) · 4.51 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
package main
import (
"flag"
"fmt"
"io"
"os"
"regexp"
"github.com/gosuri/uiprogress"
"github.com/motemen/go-pocket/api"
)
/*
TODO:
- [ ] Set creation time to the added time
*/
// Flags
var (
state string
outputDir string
domain string
tag string
search string
force bool
parallelism int
consumerKey string
)
func init() {
flag.StringVar(&state, "state", string(api.StateUnread), "Type of article to download")
flag.StringVar(&outputDir, "outputDir", "./articles", "Directory to download the articles to")
flag.StringVar(&domain, "domain", "", "Domain to limit the archiving to")
flag.StringVar(&search, "search", "", "Search to limit the archiving to")
flag.StringVar(&tag, "tag", "", "Tag to limit the archiving to")
flag.BoolVar(&force, "force", false, "Redownload already downloaded articles")
flag.IntVar(¶llelism, "parallelism", 8, "Number of threads to download the articles")
flag.Parse()
if state != string(api.StateUnread) && state != string(api.StateAll) && state != string(api.StateArchive) {
fmt.Printf("State should be: %s, %s or %s\n", string(api.StateUnread), string(api.StateAll), string(api.StateArchive))
os.Exit(1)
}
consumerKey = os.Getenv("POCKET_CONSUMER_KEY")
}
func main() {
client, err := GetClient()
if err != nil {
panic(err)
}
options := &api.RetrieveOption{
State: api.State(state),
Domain: domain,
Search: search,
Tag: tag,
}
res, err := client.Retrieve(options)
if err != nil {
fmt.Printf("%s\n", err)
os.Exit(1)
}
// Create download directory if it doesn't exist.
if _, err := os.Stat(outputDir); os.IsNotExist(err) {
if err = os.MkdirAll(outputDir, 0744); err != nil {
fmt.Printf("%s\n", err)
os.Exit(1)
}
}
uiprogress.Start()
bar := uiprogress.AddBar(len(res.List))
bar.AppendCompleted()
// Append the currently processed title
var currentTitle string
bar.AppendFunc(func(b *uiprogress.Bar) string {
renderedTitle := currentTitle
// Do not render longer lines than the width of the terminal.
w, _ := getWidth()
maxTitleLength := int(w) - b.Width - 32
if len(renderedTitle) > maxTitleLength {
renderedTitle = renderedTitle[:maxTitleLength] + "..."
}
res := fmt.Sprintf("[%d/%d] %s", bar.Current(), len(res.List), renderedTitle)
for i := 0; i < maxTitleLength-len(currentTitle)+len("..."); i++ {
res += " "
}
return res
})
// Prepare the job queue
jobs := make(chan work, len(res.List))
results := make(chan work, len(res.List))
// Worker threads
for w := 0; w < parallelism; w++ {
go worker(jobs, results)
}
// Enqueue jobs.
for _, item := range res.List {
jobs <- work{input: item}
}
close(jobs)
// Write results to file.
for range res.List {
res := <-results
bar.Incr()
if res.alreadyDownloaded {
fmt.Printf("Skipping %s\n\n", res.input.Title())
continue
}
currentTitle = res.input.Title()
if res.err != nil {
fmt.Printf("Error downloading article: %s\n\n", res.err.Error())
continue
}
f, err := os.Create(res.outputFileName)
if err != nil {
fmt.Printf("Error creating file: %s\n\n", err.Error())
continue
}
if _, err := io.Copy(f, res.output); err != nil {
fmt.Printf("Error writing to file: %s\n\n", err.Error())
continue
}
}
}
// work stores a downloadable article, and the readability result.
type work struct {
input api.Item
err error
output io.Reader
alreadyDownloaded bool
outputFileName string
}
// worker handles downloading articles from a job queue.
func worker(jobs <-chan work, results chan<- work) {
for j := range jobs {
title := j.input.Title()
outputFileName := outputDir + "/" + cleanFileName(title) + ".html"
if _, err := os.Stat(outputFileName); !os.IsNotExist(err) && !force {
results <- work{alreadyDownloaded: true, input: j.input}
continue
}
article := Article{Item: j.input}
res, err := article.Download()
results <- work{
input: j.input,
output: res,
err: err,
outputFileName: outputFileName,
}
}
}
var nonFileNameCharacters = regexp.MustCompile(`(?m)[^\w]`)
var multipleDashes = regexp.MustCompile(`[-]+`)
var trailingDash = regexp.MustCompile(`-$`)
// @TODO remove leading slash
func cleanFileName(in string) string {
// All non-filename characters to dashes.
res := nonFileNameCharacters.ReplaceAll([]byte(in), []byte("-"))
// Multiple dashes to single dash.
res = multipleDashes.ReplaceAll(res, []byte("-"))
// Remove trailing dash.
res = trailingDash.ReplaceAll(res, []byte(""))
return string(res)
}