-
Notifications
You must be signed in to change notification settings - Fork 2.4k
/
pal.py
155 lines (135 loc) · 5.73 KB
/
pal.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
from __future__ import print_function
import string, random, os, re, bisect
"""Produce Panama-ish Palindromes. Copyright (C) 2002, Peter Norvig.
See http://www.norvig.com/license.html and http://www.norvig.com/pal-alg.html"""
def is_panama(p):
"Test if p is a Panama-ish palindrome."
def is_unique(seq): return len(seq) == len(dict(zip(seq, seq)))
return (p.endswith('Panama') and is_palindrome(p)
and is_unique([s.strip() for s in p.split(',')]))
def is_palindrome(phrase):
"Test if a phrase is a palindrome."
cphrase = canonical(phrase)
return cphrase == reverse(cphrase)
def canonical(word, sub=re.compile('[^A-Za-z0-9]').sub):
"The canonical form for comparing: lowercase alphanumerics."
return sub('', word).lower()
def read_dict(filename='npdict.txt'):
"Read the file into global variables _fw and _bw and _truename."
global _fw, _bw, _truename
_fw, _bw, _truename = [], [], {'': ''}
for word in open(filename).read().splitlines():
w = canonical(word)
_fw.append(w)
_bw.append(reverse(w))
_truename[w] = word
_fw.sort(); _bw.sort()
return len(_fw), len(_bw), len(_truename)
def update(obj, **entries): obj.__dict__.update(entries); return obj
class PalDict:
"""A dictionary from which you can find canonical words that start or end
with a given canonical substring, and find the true name of a
canonical word."""
def __init__(self, fw=None, bw=None, truename=None):
update(self, fw=fw or _fw, bw=bw or _bw, truename=truename or _truename)
def startswith(self, prefix, k=100):
"""Return up to k canonical words that start with prefix.
If there are more than k, choose from them at random."""
return k_startingwith(k, self.fw, prefix)
def endswith(self, suffix, k=100):
"""Return up to k canonical words that end with suffix.
If there are more than k, choose from them at random.
Both the suffix and the word returned are reversed."""
return k_startingwith(k, self.bw, suffix)
def k_startingwith(k, words, prefix):
"""Choose up to k words that match the prefix (choose randomly if > k)."""
start = bisect.bisect(words, prefix)
end = bisect.bisect(words, prefix + 'zzzz')
n = end - start
if k >= n:
results = words[start:end]
random.shuffle(results)
else: # Should really try to avoid duplicates
results = [words[random.randrange(start, end)] for i in range(k)]
return results
class Panama:
def __init__(self, L='A man, a plan', R='a canal, Panama', dict=None):
left = [canonical(w) for w in L.split(', ')]
right = [canonical(reverse(w)) for w in reverse(R.split(', '))]
update(self, left=left, right=right, dict=dict or PalDict(), best=0,
seen={}, diff=len(''.join(left)) - len(''.join(right)))
for word in left + map(reverse, right):
self.seen[word] = 1
def missing(self, k=20):
"""Return the substring that is missing, and candidate words."""
if self.diff >= 0: # Left is longer, missing on right
substr = self.left[-1][-self.diff:]
return substr, self.dict.endswith(substr, k)
else: # Right is longer, missing on left
substr = self.right[-1][self.diff:]
return substr, self.dict.startswith(substr, k)
def search(self, k=200):
"Search for palindromes; consider at most k words at each level."
self.stack = [self.missing(k)]
while self.stack:
substr, words = self.stack[-1]
if is_palindrome(substr):
self.report()
if words:
self.extend(words.pop(), k)
elif not self.backtrack():
return
def extend(self, word, k):
"Add a new word (unless we've already seen it)."
if self.diff >= 0: # Left is longer, add to right
fword = reverse(word)
if fword in self.seen: return
self.diff -= len(fword)
self.seen[fword] = 1
self.right.append(word)
self.stack.append(self.missing(k))
else: # Right is longer, add to left
if word in self.seen: return
self.diff += len(word)
self.seen[word] = 1
self.left.append(word)
self.stack.append(self.missing(k))
def backtrack(self):
"Remove the last word added; return 0 if can't backtrack"
if self.diff >= 0: # Left is longer, pop from left
if not self.left: return 0
word = self.left.pop()
self.diff -= len(word)
del self.seen[word]
else: # Right is longer, pop from right
if not self.right: return 0
word = self.right.pop()
self.diff += len(word)
del self.seen[reverse(word)]
self.stack.pop()
return 1
def report(self):
"Write current state to log file."
if len(self) > self.best + 200:
self.best = len(self)
print(self.best)
self.bestphrase = str(self)
assert is_panama(self.bestphrase)
f = open('pallog%d.txt' % os.getpid(), 'w')
f.write(self.bestphrase + '\n')
f.close()
def __len__(self):
return len(self.left) + len(self.right)
def __str__(self):
truename = self.dict.truename
lefts = [truename[w] for w in self.left]
rights = [truename[reverse(w)] for w in reverse(self.right[:])]
return ', '.join(lefts + ['*****'] + rights)
def reverse(x):
"Reverse a list or string."
if type(x) == type(''):
return ''.join(reverse(list(x)))
else:
x.reverse()
return x
if __name__ == '__main__': read_dict(); p = Panama(); p.search()