-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathletterboxd.js
72 lines (56 loc) · 2.13 KB
/
letterboxd.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
const fs = require('fs')
const fetch = require('node-fetch')
const { parse } = require('node-html-parser')
const OUTPUT_FILE = 'films.json'
if (process.argv.length < 3) {
console.error('Error: Please provide a username.')
process.exit(1)
}
const USERNAME = process.argv[2]
const URL = `https://letterboxd.com/${USERNAME}/films/diary/page/`
async function fetchPage(pageNumber) {
const response = await fetch(URL + pageNumber)
const text = await response.text()
return parse(text)
}
async function getTotalPages(root) {
const pagination = root.querySelector('.paginate-pages')
if (!pagination) return 1
const lastPageLink = pagination.querySelectorAll('li a').pop()
return lastPageLink ? parseInt(lastPageLink.innerText.trim(), 10) : 1
}
async function scrapeFilms() {
const root = await fetchPage(1)
const totalPages = await getTotalPages(root)
const films = []
for (let i = 1; i <= totalPages; i++) {
const pageRoot = await fetchPage(i)
const filmEntries = pageRoot.querySelectorAll('.diary-entry-row')
filmEntries.forEach(entry => {
const $metadata = entry.querySelector('.edit-review-button')
const $actions = entry.querySelector('.film-actions')
const permalink = $actions.getAttribute('data-film-slug')
const watchedOn = $metadata.getAttribute('data-viewing-date')
const filmTitle = $metadata.getAttribute('data-film-name')
const rewatched = $metadata.getAttribute('data-rewatch') === 'true'
const year = $metadata.getAttribute('data-film-year')
const title = `${filmTitle} (${year})`
const rating = parseInt($metadata.getAttribute('data-rating'), 10) / 2
console.log(`${title} - ${rating} stars`)
films.push({ watched_on: watchedOn, title, rating, rewatched, permalink })
})
}
return films
}
scrapeFilms().then(films => {
const updated_at = new Date().toISOString().split('T')[0]
const outputData = {
updated_at,
count: films.length,
films
}
fs.writeFileSync(OUTPUT_FILE, JSON.stringify(outputData, null, 2))
console.log(`Total films: ${films.length}`)
}).catch(error => {
console.error('Error scraping films:', error)
})