Develop

TailUFPB · May 13, 2024 · 3ec629d · 3ec629d
1 parent fc61a86
commit 3ec629d
Show file tree

Hide file tree

Showing 16 changed files with 1,219 additions and 1,141 deletions.
diff --git a/api/DataProcesser.py b/api/DataProcesser.py
@@ -2,13 +2,19 @@
 from NbNewsModel import news_prediction
 from NbEmotionsModel import make_prediction
 from available_classifiers import get_available_classifiers
+from sklearn.pipeline import make_pipeline
+
+# bag of words
+from sklearn.feature_extraction.text import TfidfVectorizer
+
+from sklearn.model_selection import train_test_split
+from sklearn.naive_bayes import MultinomialNB
 
 import Neural_Network2
 import pickle
 import re
 import joblib
 import numpy as np
-import string
 import os
 import pandas as pd
 import torch
@@ -30,7 +36,8 @@ def handle_classify(self, df, classifier):
  classifier_switcher = get_available_classifiers() # id: nome_arquivo
  model_name = classifier_switcher[classifier]
  if model_name.endswith('.pkl'):
- return self.pretrained_predict(df, model_name)
+ pipeline = self.get_pipeline(model_name)
+ return self.pretrained_predict(df, pipeline)
  else:
  return self.trained_predict(df, model_name)
  #classifier_switcher = {
@@ -41,6 +48,20 @@ def handle_classify(self, df, classifier):
 
  #return classifier_switcher.get(classifier, lambda: "Invalid Classifier")(df)
 
+ def get_pipeline(self, model_name):
+ if model_name=="emotion_pipeline.pkl":
+ df = pd.read_csv('api/training_df/tweet_emotions.csv')
+ train_data, test_data, train_target, test_target = train_test_split(df['content'], df['sentiment'], test_size=0.2, shuffle=True)
+ elif model_name=="hate_speech.pkl":
+ df = pd.read_csv('api/training_df/nb_hatespeech.csv', sep=';')
+ train_data, test_data, train_target, test_target = train_test_split(df['comment'], df['isHate'], test_size=0.2, shuffle=True)
+ elif model_name=="text_classification_pipeline.pkl":
+ df = pd.read_csv('api/training_df/nb_news.csv')
+ train_data, test_data, train_target, test_target = train_test_split(df['short_description'], df['category'], test_size=0.2, shuffle=True)
+ else:
+ return None
+ return make_pipeline(TfidfVectorizer(), MultinomialNB()).fit(train_data, train_target)
+
  def generate_statistics(self, df):
  unique_labels = df['output_column'].unique()
 
@@ -67,19 +88,15 @@ def classify_emotions(self, df):
  df['output_column'] = df['input_column'].apply(make_prediction)
  return df
 
- def lin_regression_model(self, df):
- df['output_column'] = df['input_column'].apply(make_prediction_nblin)
- return df
+ # def lin_regression_model(self, df):
+ #  df['output_column'] = df['input_column'].apply(make_prediction_nblin)
+ #  return df
 
  def nb_news_application(self, df):
  df['output_column'] = df['input_column'].apply(news_prediction)
  return df
 
- def pretrained_predict(self, df, model_name):
- model_file = f'api/models/{model_name}'
- with open(model_file, 'rb') as model:
- pipeline = pickle.load(model)
-
+ def pretrained_predict(self, df, pipeline):
  texts_to_predict = df['input_column']
  texts_to_predict = [str(text) for text in texts_to_predict]
  predictions = pipeline.predict(texts_to_predict)

diff --git a/api/models/emotion_pipeline.pkl b/api/models/emotion_pipeline.pkl
diff --git a/api/models/hate_speech.pkl b/api/models/hate_speech.pkl
diff --git a/api/models/news_classification.pkl b/api/models/news_classification.pkl
diff --git a/api/models_code/nb_emotions.py b/api/models_code/nb_emotions.py
@@ -1,3 +1,5 @@
+# move?
+
 import pandas as pd
 import pickle
 from sklearn.pipeline import make_pipeline
@@ -24,4 +26,8 @@
 
 # Salvando o pipeline em um arquivo .pkl
 with open("../models/emotion_pipeline.pkl", "wb") as model_file:
- pickle.dump(pipeline, model_file)
+ pickle.dump(pipeline, model_file)
+
+
+
+
diff --git a/api/models_code/nb_hatespeech.py b/api/models_code/nb_hatespeech.py
@@ -23,5 +23,5 @@
 pipeline.fit(X_train, y_train)
 
 # Salvando o pipeline em um arquivo .pkl
-with open("hate_speech.pkl", "wb") as model_file:
+with open("../models/hate_speech.pkl", "wb") as model_file:
  pickle.dump(pipeline, model_file)
diff --git a/api/models_code/nb_news.py b/api/models_code/nb_news.py
@@ -23,5 +23,5 @@
 pipeline.fit(X_train, y_train)
 
 # Salvando o pipeline em um arquivo .pkl
-with open("text_classification_pipeline.pkl", "wb") as model_file:
+with open("../models/text_classification_pipeline.pkl", "wb") as model_file:
  pickle.dump(pipeline, model_file)
diff --git a/api/requirements.txt.orig b/api/requirements.txt.orig
@@ -10,15 +10,27 @@ seaborn==0.13.0
 tensorflow==2.16.1
 keras==3.0.0
 =======
-Flask==2.3.2
-Flask-Cors==4.0.0
-pandas==2.0.3
-nltk==3.8.1
-scikit-learn==1.3.0
-numpy==1.25.2
-matplotlib==3.7.1
-seaborn==0.13.0
-tensorflow==2.16.1
-keras==3.0.0
+Flask==2.3.2
+Flask-Cors==4.0.0
+pandas==2.0.3
+nltk==3.8.1
+scikit-learn==1.3.0
+numpy==1.25.2
+matplotlib==3.7.1
+seaborn==0.13.0
+tensorflow==2.16.1
+keras==3.0.0
 torchtext==0.17.2
 >>>>>>> Stashed changes:requirements.txt
+
+Flask==2.3.2
+Flask-Cors==4.0.0
+pandas==2.0.3
+nltk==3.8.1
+scikit-learn==1.3.0
+numpy==1.25.2
+matplotlib==3.7.1
+seaborn==0.13.0
+tensorflow==2.16.1
+keras==3.0.0
+torchtext==0.17.2