-
Notifications
You must be signed in to change notification settings - Fork 0
/
Program.cs
146 lines (121 loc) · 4.98 KB
/
Program.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
namespace MyClippingsParser
{
/* Format:
Title (Author)
- Your Highlight on page X | Location X-X | Added on LongDate
Highlight string
==========
*/
class Program
{
static Regex _pattern = new Regex(@"^(?<title>.+) \((?<author>.+)\)\r\n- ((Your Highlight on page (?<page>\d+) \| location (?<locationStart>\d+)-(?<locationEnd>\d+))|(Your Highlight at location (?<locationStart>\d+)-(?<locationEnd>\d+))) \| Added on (?<date>.+)\r\n\r\n(?<highlight>.+)\r\n==========", RegexOptions.Multiline);
static void Main(string[] args)
{
string fname = args.Length > 0 ? args[0] : @"My Clippings.txt";
if (args.Length == 0 && !File.Exists(fname))
{
Console.Write("No such file");
return;
}
string file = File.ReadAllText(fname);
var parsed = Parse(file);
Output(parsed);
}
private static bool ContainsOrIsContained(string str1, string str2)
{
return str1.Contains(str2) || str2.Contains(str1);
}
private static Dictionary<string, List<Match>> Parse(string str)
{
var highlightsTmp = new Dictionary<string, List<Match>>();
var highlights = new Dictionary<string, List<Match>>();
var matches = _pattern.Matches(str);
//split them by book first
for (int i = 0; i < matches.Count; i++)
{
AddMatch(highlightsTmp, matches[i]);
}
foreach(var kvp in highlightsTmp)
{
var key = kvp.Key;
var bookMatches = kvp.Value;
highlights.Add(key, new List<Match>());
string prevTxt = "";
for (int i = 0; i < bookMatches.Count; i++)
{
var match = bookMatches[i];
string highlight = match.Groups["highlight"].Value;
string title = match.Groups["title"].Value;
if (prevTxt != "" && !ContainsOrIsContained(prevTxt, highlight))
{
highlights[key].Add(bookMatches[i - 1]);
}
if (i == bookMatches.Count - 1)
{
highlights[key].Add(match);
}
prevTxt = highlight;
}
}
return highlights;
}
private static void AddMatch(Dictionary<string, List<Match>> highlights, Match match)
{
string title = match.Groups["title"].Value;
string author = match.Groups["author"].Value;
string comb = $"{author} - {title}";
if (!highlights.ContainsKey(comb))
{
highlights.Add(comb, new List<Match>());
}
highlights[comb].Add(match);
}
private static void Output(Dictionary<string, List<Match>> highlights)
{
Directory.CreateDirectory("notes");
foreach (var kvp in highlights)
{
string textWithPages = ToStringWithPages(kvp.Value);
var file = File.CreateText(Path.Combine("notes", ToSafeFileName(kvp.Key + ".txt")));
file.Write(textWithPages);
file.Close();
string textWithoutPages = ToStringWithoutPages(kvp.Value);
file = File.CreateText(Path.Combine("notes", ToSafeFileName(kvp.Key + "_nopages.txt")));
file.Write(textWithoutPages);
file.Close();
}
}
private static string ToStringWithPages(IEnumerable<Match> matches)
{
var highlights = matches.Select(x =>
(string.IsNullOrEmpty(x.Groups["page"].Value)
? "Loc: " + x.Groups["locationStart"].Value
: "Page: " + x.Groups["page"].Value)
+ "\r\n\r\n" + x.Groups["highlight"].Value);
return string.Join("\r\n\r\n----\r\n\r\n", highlights);
}
private static string ToStringWithoutPages(IEnumerable<Match> matches)
{
var highlights = matches.Select(x => "\r\n\r\n\"" + x.Groups["highlight"].Value + "\"");
return string.Join("", highlights);
}
private static string ToSafeFileName(string s)
{
return s
.Replace("\\", "")
.Replace("/", "")
.Replace("\"", "")
.Replace("*", "")
.Replace(":", "")
.Replace("?", "")
.Replace("<", "")
.Replace(">", "")
.Replace("|", "");
}
}
}