-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
98 lines (89 loc) · 3.18 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
"""
Program to demonstrate usage of whowrotethis
python module
"""
import os
import matplotlib.pyplot as plt
import pandas as pd
from whowrotethis import (TextPreprocessing, TextEmbedding, Classifier,
EvaluateModel, UserApp)
from whowrotethis.models.EnsembledModel import EnsembledModel
from whowrotethis.EvaluateClassifier import evaluate
def main():
# For text preprocessing
text = "This is a test."
processor = TextPreprocessing(text, file_given=False)
preprocessed_text = processor.preprocess()
print(preprocessed_text)
# For text embeddings
embeddings = TextEmbedding('text.txt').get_embeddings()
print(embeddings)
predict = Classifier(embeddings)
print(predict.predict_text())
# for streamlit app (uncomment below)
UserApp().run_app()
# # Evaluate the ensembled models------------------------------------------
# # Load data
# data = pd.read_csv(
# f'{os.getcwd()}\\whowrotethis\\data\\10k_raw_unseen.csv')
# x_test = data.loc[:, '0' : '767']
# y_test = data['label']
#
# # Get predictions
# model = EnsembledModel(x_test)
# y_pred_1 = model.simple_predict()
# y_pred_2 = model.weighted_predict()
#
# # Prepare figure
# fig, axs = plt.subplots(1, 2, figsize=(15, 5), tight_layout=True)
# count = 0
# fig.suptitle("Number of wrong predictions using the ensembled model")
#
# # Evaluate models
# print("Weighted Ensemble Model:")
# evaluate(y_test, y_pred_2, axs, count, "Weighted Ensembled Model")
# count += 1
# print("-" * 30)
#
# print("Unweighted Ensemble Model:")
# evaluate(y_test, y_pred_1, axs, count, "Unweighted Ensembled Model")
#
# plt.show()
# # -----------------------------------------------------------------------
# # Evaluate all the models separately-------------------------------------
# df = pd.read_csv(
# f'{os.getcwd()}\\whowrotethis\\models\\model_description.csv')
# print("Model Evaluation on 10k raw text embeddings")
#
# # Used to draw the bar plots
# count = 0
# fig1, axs1 = plt.subplots(
# 1, 5, figsize=(15, 5), tight_layout=True)
# fig2, axs2 = plt.subplots(
# 1, 5, figsize=(15, 5), tight_layout=True)
# fig1.suptitle("Number of wrong predictions on 10k raw texts")
# fig2.suptitle("Number of wrong predictions on 10k preprocessed texts")
#
# for model in df['model_file']:
# # Test on raw unseen text embeddings
# print("Test on 10k raw text embeddings" + "-" * 20)
# report_1 = EvaluateModel(
# model, '10k_raw_unseen.csv', axs1, count)
# print(report_1)
# report_1.evaluate()
# report_1.show_wrong()
#
# # Test on preprocessed text embeddings
# print("Test on 10k preprocessed text embeddings" + "-" * 20)
# report_2 = EvaluateModel(
# model, '10k_preprocessed_unseen.csv', axs2, count)
# print(report_2)
# report_2.evaluate()
# report_2.show_wrong()
#
# count += 1
#
# plt.show()
# # -----------------------------------------------------------------------
if __name__ == "__main__":
main()