-
Notifications
You must be signed in to change notification settings - Fork 0
/
format_text.py
45 lines (36 loc) · 1.33 KB
/
format_text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from sys import argv
source_text = str(argv[1])
# data = open("./rep_speech.txt").read()
cleanedData = open("./cleaned_source_text/"+source_text[:-4]+"_cleaned.txt", 'w')
with open("./source_text/"+source_text,'r+') as file:
# write only the nonempty lines
for line in file:
if not line.isspace():
if '.' not in line:
cleanedData.write(line)
continue
oneSentence = ""
flag = 0
for char in line:
if flag: # skip the space after a .
flag = 0
continue
if char == '.': # add the current sentence to the file, flag the following space.
cleanedData.write(oneSentence+'\n')
oneSentence = ""
flag = 1
continue
oneSentence += char
# cleanedData.write(line)
# cleanedData.write(line)
# for each '.', replace with '\n', skipping following spaces
# lastChar = ''
# for character in file:
# if character == '.':
# cleanedData.write("\n")
# elif lastChar == '.':
# lastChar = character
# continue
# else:
# cleanedData.write(character)
# lastChar = character