-
Notifications
You must be signed in to change notification settings - Fork 1
/
kindler.py
148 lines (114 loc) · 5.32 KB
/
kindler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
"""
MIT License
Copyright (c) 2020, Sanjay Subramaniam
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Project: Kindler
Started: 20/04/2020
Summary: Scrape your highlights from Kindle's My Clippings.txt
"""
import os
from sys import argv
my_clippings = 'My Clippings.txt' # filename.
def getLines():
"Returns list of all lines from file. Extra delimeter is added to beginning."
file = open(my_clippings, 'r', encoding='utf-8-sig')
# can avoid extra delimeter by indexing properly
return ['=========='] + [line.strip() for line in file]
def getUnits():
"""Returns list of all 'units'.
A unit is a tuple with (title, details, message), NOT a highlight. Highlights and notes are messages."""
def delimeterIndices(delimeter='=========='):
"Returns indices of lines with delimeter. We use this to identify individual highlights."
lines = getLines() # why call getLines() twice?
return [i for i, line in enumerate(lines) if line == '==========' and i != len(lines)-1]
# if no extra delimeter in getLines(), last condition is just len(lines). Which is easier to read?
def parseDetails(details):
"Returns tuple (kind_of_unit, location). For locations of type '100-123', we take 100."
listed_details = details.split()
return(listed_details[2], int(f"{listed_details[8] if 'on' not in listed_details[8] else listed_details[5]}".split('-')[0]))
lines, units = getLines(), []
for delimeterIndex in delimeterIndices():
title = lines[delimeterIndex+1]
# (highlight/note/bookmark, location)
details = parseDetails(lines[delimeterIndex+2])
message = lines[delimeterIndex+4]
units.append((title, details, message))
return units
def getTitles():
"Returns alphabetically sorted list of titles. Removes duplicates."
# to-do: allow sorting using keys- last read or alphabetically.
from string import ascii_letters
titles = []
for unit in units:
title = unit[0]
# handling titles that start with u'\ufeff'.
if title[0] not in ascii_letters:
titles.append(title[1:])
else:
titles.append(title)
return sorted(list(set(titles)))
def help():
features = {'showTitles': 'Show all the titles in your clipping.',
'importAsTxt': 'Import your highlights as .txt in ../highlights.',
'importAsJSON': 'Import your highlights as JSON.'}
print("Welcome to kindler.py, use me to scrape your highlights from Kindle's My Clippings.txt \n")
print("Here are commands and their descriptions: \n")
for command, description in features.items():
print(f"{command} : {description}")
def showTitles():
"Prints all titles."
[print(i, title) for i, title in enumerate(getTitles(), start=1)]
def importAsTxt():
"Imports your clippings as txt file, saves them in ../highlights/{title}.txt"
def highlightsFrom(title):
"Returns list of all highlights in given title."
matching_units = filter((lambda unit: title in unit[0]), units)
temp = sorted(matching_units, key=(lambda x: x[1][1]))
highlights = [unit[2] for unit in temp]
return highlights
def makeFile(title):
cwd = os.path.abspath(os.curdir)
directory = f'{cwd}/highlights'
if not os.path.exists(directory):
os.makedirs(directory)
return open(os.path.join(cwd, f'highlights/{title}.txt'), 'w')
for title in getTitles():
outfile = makeFile(title)
for highlight in highlightsFrom(title):
outfile.write(highlight)
outfile.write('\n')
outfile.close()
print(
f"I've saved {len(units)} highlights, notes and bookmarks from {len(getTitles())} titles in /highlights. You're welcome!")
def importAsJSON():
return None
if __name__ == "__main__":
option = argv[1]
modes = {'showTitles': showTitles,
'importAsTxt': importAsTxt, 'importAsJSON': importAsJSON}
if option == 'help':
help()
else:
try:
units = getUnits()
try:
outputs = modes[option]()
except KeyError:
print('Invalid option! For help, run: python kindler.py help')
except FileNotFoundError:
print(
"Kindler: I can't find your My Clippings.txt file here. Did you paste it? \n")