-
Notifications
You must be signed in to change notification settings - Fork 0
/
PromptSanitizer.py
69 lines (53 loc) · 1.9 KB
/
PromptSanitizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import json
import sys
class PromptSanitizer:
def __init__(self):
self.line_new = ""
self.flag_char = False
self.string_array = []
self.prev_char = ''
self.start_character = '{' # required for recursive call
self.end_character = '}'
self.permitted_characters = [',',':','{','}',"[","]"]
def preprocessing(self,lines):
lines = lines.replace('\n','\\n')
lines = lines.replace(' ','\\s')
return lines
def postprocessing(self,line):
line = line.replace('\\s',' ')
return line
def jsonSanitizer(self,lines):
lines = self.preprocessing(lines)
sanitized, _ = self.__stringToDict(lines)
return sanitized
def __isPayload(self,char):
return self.flag_char
def __stringToDict(self,lines):
new_dict = {}
counter = 0
string_array = []
for char in lines:
# string_array.append(char)
if char == "\"" and self.prev_char != '\\':
self.flag_char = not self.flag_char
string_array.append(char)
if self.flag_char and not (char == "\"" and self.prev_char != '\\'):
string_array.append(char)
self.prev_char = char
else:
self.prev_char = ''
if char in self.permitted_characters and counter > 0:
string_array.append(char)
counter+=1
postprocessed = self.postprocessing('{'+''.join(string_array))
return postprocessed,counter
if __name__=="__main__":
lines = ""
print(lines)
with open("tmp.txt", "r") as fJson:
lines = fJson.read()
ps = PromptSanitizer()
new_dict = ps.dictSanitizer(lines)
print(new_dict)
with open("tmp.json", "w") as fJson:
json.dump(new_dict,fJson)