-
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbenchmark.py
64 lines (61 loc) · 3 KB
/
benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from src.workers import *
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='Clean Discord data')
parser.add_argument('-dir', type=str, default="data",
help='the fonlder that contains the data file')
parser.add_argument('-out', type=str, default="output",
help='the folder to output txts')
parser.add_argument('-step', type=str, nargs="+", default="all",
help='the step to start cleaning from')
args = parser.parse_args()
worstcase_clean="""
Hi, this is a test.
```
This is some code:
if args.step == "all":
steps=["regex", "pairs", "detox"]
else:
try:
steps=json.loads(args.step)
assert type(steps)==list
except: raise Exception("Unable to load steps json.")
```
`this too, but it's only one line`
heh maybe if i put it on one line ```e``` or does `this` work
REEE WHY IS IS BEING CLEANED OOOOF HOWWWWWWWW
I AM THE KING ♕ ✦ —• YOU CANNOT STOP ME
what about this 𝐈𝐌𝐀𝐆𝐄, it should be IMAGE.
hahahahaha i bet you can't beat my cool asian language 毛泽东万岁
WHAAAAAAAAAAAAAAAAAAAAAAT noooooooooooooooooooooooo it can't be..................
hahaha but my best invention yet, my friend @Deleted User and @Deleted User. They will surely defeat you.
plenty of spaces ??????????????? 🥲
fine. one last resort. my email is [email protected] and you can join my server at https://jadeai.ml/server. Join or else.
if those didn't work maybe my phone numbers, +2 (666) 768-1111 or 408 220 0343 will work. meet me at 12:00 :3
✧・゚:*angela*:・゚☆✧: I am the best
"""
print("Running a clean test case ~~~~~~~~")
print(f"{worstcase_clean}\nRaw ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
print(clean(worstcase_clean).replace("\\n","\n"))
print("Clean ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n")
if args.step == "all": args.step = ["regex", "detox", "antispam"]
for step in args.step:
if step == "regex":
try:os.mkdir(args.out)
except: pass
print("\033[1mRunning regex test\033[0m")
ret=[worker_regex(os.listdir(args.dir)[0], args.dir, args.out, debug=True)]
write_stats(ret, args.out)
elif step == "detox":
try:os.mkdir(args.out+"-detox")
except: pass
print(f"\033[1mRunning detox test\033[0m")
ret=[worker_detox([f for f in os.listdir(args.out) if f.endswith(".txt")][0], args.out, args.out+"-detox", debug=True)]
write_stats(ret, args.out+"-detox")
elif step == "antispam":
try:os.mkdir(args.out+"-antispam")
except: pass
print("\033[1mRunning antispam test\033[0m")
ret=[worker_antispam([f for f in os.listdir(args.out+"-detox") if f.endswith(".txt")][0], args.out+"-detox", args.out+"-antispam", debug=True)]
write_stats(ret, args.out+"-antispam")
print("DONE")