-
Notifications
You must be signed in to change notification settings - Fork 0
/
kavascript.py
executable file
·489 lines (407 loc) · 16 KB
/
kavascript.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
#!/usr/bin/env python
from __future__ import division
import os,sys
# TODO:
# don't mess up existing multi-line brackets?
# write file input / ouput
# fix single-line closures when CLOSURE_TAILS is on?
# write command line handling
# handle regexes
# strip trailing space added after "{" if it's the last thing on the line
# ANNOTATIONS used by the Line class:
# - whitespace
# c comment //
# C comment /* */
# "' string
# r regex
# x code
# X code added during translation
#===================================================================================================
# OPTIONS
# if True, add ")()" at the end of a closure. if False, you have to write that yourself in your code.
CLOSURE_TAILS = True
DEBUG = True
def debug(n,s):
if DEBUG: print '| %s%s'%(' '*n,s)
#===================================================================================================
# UTILS
def multiFind(string,substring):
"""Return a list if integers indicating where the substring begins in the string.
Substrings are not allowed to overlap themselves:
multifind('pppp','pp') = [0,2]
If there are no matches, return []
"""
start = 0
indices = []
while True:
start = string.find(substring,start)
if start == -1:
return indices
indices.append(start)
start += len(substring)
#===================================================================================================
# LINE
class Line(object):
"""A line of code, plus extra structures to understand and transform it.
Most methods should be used either before or after prepareForTranslation():
-- Phase 1: Create a new Line with text and lineNum. Call setAnnotation().
-- Call prepareForTranslation()
-- Phase 2: Call removeLastColon(), replaceClosure(), addOpenBracket()
An exception is indent, which can be changed either before or after prepareForTranslation()
"""
def __init__(self,text,lineNum):
self.text = text
self.annotation = ['-'] * len(self.text)
self.newText = None
self.newAnnotation = None
self.lineNum = lineNum
self.indent = -1 # -1 for useless lines, 0, 1, 2... for useful lines
self.preparedForTranslation = False
self.hasClosure = False
def numLeadingSpaces(self):
ii = 0
for here in self.annotation:
if here != '-': return ii
ii += 1
return ii
def hasCode(self):
"""Does this line contain any code or strings? Otherwise it's just comments and/or whitespace.
"""
return 'x' in self.annotation or 'X' in self.annotation or '"' in self.annotation or "'" in self.annotation
def setAnnotation(self,ii,state):
assert not self.preparedForTranslation
self.annotation[ii] = state
def prepareForTranslation(self):
"""Call this when you're done loading the original text and annotation into the line object.
Once this has been called, never change the original text or annotation.
"""
self.newText = self.text
self.newAnnotation = self.annotation[:]
self.preparedForTranslation = True
def removeLastColon(self):
"""If the last code character is a color, remove it.
Assumes this line object has been prepared for translation.
"""
assert self.preparedForTranslation
if not self.hasCode(): return
if not ':' in self.newText: return
newText = list(self.newText)
# go backwards from the right until you hit the first 'x'
for ii in range(len(newText)-1,-1,-1): # backwards
if self.newAnnotation[ii] in 'xX':
if newText[ii] == ':':
del newText[ii]
del self.newAnnotation[ii]
break
self.newText = ''.join(newText)
def replaceClosure(self):
"""Assumes this line object has been prepared for translation.
"""
assert self.preparedForTranslation
token = 'closure'
if CLOSURE_TAILS:
newToken = '(function ()'
else:
newToken = 'function ()'
# prepare a chunk of annotation to go along with the new token
newAnnotation = []
for char in newToken:
if char.isspace():
newAnnotation.append('-')
else:
newAnnotation.append('X')
# find the first non-string, non-comment instance of the token
tokenIndices = multiFind(self.newText,token)
for ii in tokenIndices:
# make sure it's actual code
if self.newAnnotation[ii] not in 'xX': continue
# replace the token and annotation
self.newText = self.newText[:ii] + newToken + self.newText[ii+len(token):]
self.newAnnotation = self.newAnnotation[:ii] + list(newAnnotation) + self.newAnnotation[ii+len(token):]
self.hasClosure = True
# only do the first one we find
break
def addOpenBracket(self):
"""Assumes this line object has been prepared for translation.
"""
assert self.preparedForTranslation
# make a temporary list version of the text
newText = list(self.newText)
# go backwards from the right until you hit the first 'x'
for ii in range(len(newText)-1,-1,-1): # backwards
if self.newAnnotation[ii] in 'xX':
break
# ii is now the index of the rightmost 'x'
# insert our bracket there and update the annotation
endOfLine = (ii == len(newText)-1)
if not endOfLine:
newText.insert(ii+1,' ')
self.newAnnotation.insert(ii+1,'-')
newText.insert(ii+1,'{')
newText.insert(ii+1,' ')
self.newAnnotation.insert(ii+1,'X')
self.newAnnotation.insert(ii+1,'-')
# convert text back to a string
self.newText = ''.join(newText)
#===================================================================================================
# PROGRAM
class Program(object):
"""A collection of Lines and some code to parse and transform them
from KavaScript to JavaScript.
Life cycle:
program = Program()
program.readString(SRC)
program.annotate()
success = program.translate()
if not success:
sys.exit(0)
for line in program.lines:
print line.newText
"""
def __init__(self,lines=[]):
self.lines = []
if lines:
self.lines = lines[:]
def addLine(self,line):
self.lines.append(line)
def readString(self,s):
"""Read a string, split it into lines, and add it to self.
"""
debug(0,'reading string')
for lineNum,text in enumerate(SRC.splitlines()):
self.addLine(Line(text,lineNum+1))
def printNewAnnotatedSource(self):
"""Print the generaated javascript with extra lines showing our annotation of the code.
"""
debug(0,'printing generated javascript')
for line in self.lines:
hasCode = ' >'[line.hasCode()]
#print '%3s %s'%(line.lineNum,line.text)
print '%s%3s %s'%(hasCode,line.indent, line.newText)
print ' %s'%(''.join(line.newAnnotation))
print
def printAnnotatedSource(self):
"""Print the original kavascript with extra lines showing our annotation of the code.
"""
debug(0,'printing original kavascript')
for line in self.lines:
hasCode = ' >'[line.hasCode()]
#print '%3s %s'%(line.lineNum,line.text)
print '%s%3s %s'%(hasCode,line.indent, line.text)
print ' %s'%(''.join(line.annotation))
print
def annotate(self):
"""Parse the original kavascript to understand where the comments, strings, etc are.
"""
debug(0,'annotating')
state = '-'
# this is a simple state machine that goes through the whole program line by line
for line in self.lines:
for ii,char in enumerate(line.text):
nextChar = None
prevChar = None
prevState = state
nextState = None
if ii != 0:
prevChar = line.text[ii-1]
if ii != len(line.text)-1:
nextChar = line.text[ii+1]
# long comment: begin
if char == '/' and nextChar == '*' and state in '-xX':
state = 'C'
# end
elif prevChar == '*' and char == '/' and state in 'C':
nextState = '-'
# single quoted string: begin
if char == "'" and state in '-xX':
state = "'"
# end
elif char == "'" and prevChar != '\\' and state == "'":
nextState = '-'
# double quoted string: begin
if char == '"' and state in '-xX':
state = '"'
# end
elif char == '"' and prevChar != '\\' and state == '"':
nextState = '-'
# whitespace / code
if not char.isspace() and state == '-':
state = 'x'
if char.isspace() and state in 'xX':
state = '-'
# begin a small comment
if char == '/' and nextChar == '/' and state in '-xX':
state = 'c'
line.setAnnotation(ii,state)
if nextState:
state = nextState
# end of line: end a small comment
if state == 'c':
state = '-'
def translate(self):
"""Compute the new javascript from the given kavascript.
Store it in each line's newText attribute.
Assumes annotate() has already been called.
return True, or False if there were any problems.
"""
debug(0,'translating')
for line in self.lines:
line.prepareForTranslation()
debug(1,'parsing indentation')
lastGoodIndent = indent = 0
for line in self.lines:
if not line.hasCode(): continue
spaces = line.numLeadingSpaces()
indent = int(spaces / 4)
if indent == lastGoodIndent:
line.indent = indent
lastGoodIndent = indent
elif spaces%4 == 0 and indent == lastGoodIndent + 1:
line.indent = indent
lastGoodIndent = indent
elif spaces%4 == 0 and indent < lastGoodIndent:
line.indent = indent
lastGoodIndent = indent
elif spaces > lastGoodIndent*4:
line.indent = lastGoodIndent
elif spaces < lastGoodIndent*4:
print 'WARNING: line %s has confusing indentation:'%line.lineNum
print '>>>%s<<<'%line.text
line.indent = lastGoodIndent
debug(1,'replacing "closure" tokens')
for line in self.lines:
if line.hasCode():
line.replaceClosure()
debug(1,'adding open brackets and removing trailing colons')
lastIndent = 0
realLines = [line for line in self.lines if line.hasCode() and line.indent != -1]
# add open brackets
for ii in range(len(realLines)-1):
lineA = realLines[ii]
lineB = realLines[ii+1]
if lineA.indent < lineB.indent:
lineA.removeLastColon()
lineA.addOpenBracket()
debug(1,'adding close brackets and parens at end of closures')
# add close brackets by going backwards
realLines = [line for line in self.lines if line.hasCode() and line.indent != -1]
# have to add a fake last line to make this work for some reason
fakeLastLine = Line('',-1)
fakeLastLine.indent = 0
fakeLastLine.prepareForTranslation()
realLines.append(fakeLastLine)
for ii in range(len(realLines)-2,-1,-1):
lineA = realLines[ii]
lineB = realLines[ii+1]
if lineA.indent > lineB.indent:
numCloseBrackets = lineA.indent - lineB.indent
for bb in range(numCloseBrackets):
indentHere = lineB.indent + bb
# look upwards and find the line with the open bracket that corresponds to this close bracket (the "parent")
# so we can know if it's a closure or not
parentHasClosure = 'error'
for parentii in range(ii,-1,-1):
if realLines[parentii].indent == indentHere:
parentHasClosure = realLines[parentii].hasClosure
break
if parentHasClosure == 'error':
print "ERROR: couldn't find parent line for close bracket to be inserted between lines %s and %s"%(ii,ii+1)
return False
# add a new line with a close bracket (and maybe ")()" if the parent is a closure)
if parentHasClosure and CLOSURE_TAILS:
newLine = Line(' '*indentHere + '})()',-1)
else:
newLine = Line(' '*indentHere + '}',-1)
# set up and add the new line
newAnnotation = []
for char in newLine.text:
if char.isspace():
newAnnotation.append('-')
else:
newAnnotation.append('X')
newLine.annotation = newAnnotation
newLine.indent = indentHere
newLine.prepareForTranslation()
self.lines.insert(lineA.lineNum,newLine)
# success
return True
#===================================================================================================
# MAIN
SRC = r"""
// comment with apparent "string" and the word closure
/* long comment
with decoy things: closure "string" */
var myObject = closure: // this will be replaced with "function ()"
var value = 0;
var mystring1 = "he'l\"lo // there";
var mystring2 = 'he"l\"lo // there';
var mystring3 = "closure";
if ( (1+2+3+4+5+6+7+8+9+10 == 1)
&& (1+2+3+4+5+6+7+8+9+10 == 1) ):
value += 1;
// comment
return // comment
increment: function (inc):
value += typeof inc === 'number' ? inc : 1;
,
getValue: function ():
return value;
;
// closure test
cubes = closure:
var i, len, results;
results = [];
for (i = 0, len = list.length; i < len; i++):
num = list[i];
results.push(math.cube(num));
return results;
;
// typical node code
http.createServer(function (req, res):
res.writeHead(200, {'Content-Type': 'text/plain'});
res.end('Hello World\n');
).listen(1337, "127.0.0.1");
Step(
function readDir():
fs.readdir(__dirname, this);
,
function readFiles(err, results):
if (err) throw err;
// Create a new group
var group = this.group();
results.forEach(function (filename):
if (/\.js$/.test(filename))
fs.readFile(__dirname + "/" + filename, 'utf8', group());
);
,
function showAll(err , files):
if (err) throw err;
console.dir(files);
);
// object literals / JSON with funny indentation
var x = doSomethingTo(
cities1: [
'boston',
'new york'
],
cities2: [
'boston',
'new york'
],
cities3: [
'boston',
'new york'
]
);
"""
program = Program()
program.readString(SRC)
program.annotate()
success = program.translate()
if not success:
sys.exit(0)
print '==========================================='
# program.printNewAnnotatedSource()
for line in program.lines:
print line.newText
print '==========================================='