-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbinsearch.py
33 lines (27 loc) · 952 Bytes
/
binsearch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import subprocess
#python 3.5.2
TARGET_CHART_SIZE = 7400
CORPUS_FILE = 'java_testing/elasticsearch.corpus'
TEMP_FILE = 'java_testing/temp.corpus'
GRAMMAR_FILE = "./java_testing/java_no_epsilons.gr"
with open(CORPUS_FILE) as f:
full_corpus = f.read().split()
def chart_size(input_size):
corpus = full_corpus[:input_size]
with open(TEMP_FILE, 'w') as f:
f.write(' '.join(corpus))
val = subprocess.run(["python", "earley.py", GRAMMAR_FILE, TEMP_FILE], stdout=subprocess.PIPE)
output = val.stdout.decode('utf-8')
return output.count('\n')
def binary_search(target_size, low=0, high=len(full_corpus)):
if high == low + 1:
return low
med = (low + high) // 2
med_size = chart_size(med)
print("{} {} => {}".format(low, high, med_size))
if med_size > target_size:
return binary_search(target_size, low, med)
else:
return binary_search(target_size, med, high)
size = binary_search(TARGET_CHART_SIZE)
print(' '.join(full_corpus[:size]))