Skip to content

Commit

Permalink
Develop
Browse files Browse the repository at this point in the history
  • Loading branch information
cmaloney111 committed May 13, 2024
1 parent fc61a86 commit 3ec629d
Show file tree
Hide file tree
Showing 16 changed files with 1,219 additions and 1,141 deletions.
37 changes: 27 additions & 10 deletions api/DataProcesser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,19 @@
from NbNewsModel import news_prediction
from NbEmotionsModel import make_prediction
from available_classifiers import get_available_classifiers
from sklearn.pipeline import make_pipeline

# bag of words
from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB

import Neural_Network2
import pickle
import re
import joblib
import numpy as np
import string
import os
import pandas as pd
import torch
Expand All @@ -30,7 +36,8 @@ def handle_classify(self, df, classifier):
classifier_switcher = get_available_classifiers() # id: nome_arquivo
model_name = classifier_switcher[classifier]
if model_name.endswith('.pkl'):
return self.pretrained_predict(df, model_name)
pipeline = self.get_pipeline(model_name)
return self.pretrained_predict(df, pipeline)
else:
return self.trained_predict(df, model_name)
#classifier_switcher = {
Expand All @@ -41,6 +48,20 @@ def handle_classify(self, df, classifier):

#return classifier_switcher.get(classifier, lambda: "Invalid Classifier")(df)

def get_pipeline(self, model_name):
if model_name=="emotion_pipeline.pkl":
df = pd.read_csv('api/training_df/tweet_emotions.csv')
train_data, test_data, train_target, test_target = train_test_split(df['content'], df['sentiment'], test_size=0.2, shuffle=True)
elif model_name=="hate_speech.pkl":
df = pd.read_csv('api/training_df/nb_hatespeech.csv', sep=';')
train_data, test_data, train_target, test_target = train_test_split(df['comment'], df['isHate'], test_size=0.2, shuffle=True)
elif model_name=="text_classification_pipeline.pkl":
df = pd.read_csv('api/training_df/nb_news.csv')
train_data, test_data, train_target, test_target = train_test_split(df['short_description'], df['category'], test_size=0.2, shuffle=True)
else:
return None
return make_pipeline(TfidfVectorizer(), MultinomialNB()).fit(train_data, train_target)

def generate_statistics(self, df):
unique_labels = df['output_column'].unique()

Expand All @@ -67,19 +88,15 @@ def classify_emotions(self, df):
df['output_column'] = df['input_column'].apply(make_prediction)
return df

def lin_regression_model(self, df):
df['output_column'] = df['input_column'].apply(make_prediction_nblin)
return df
# def lin_regression_model(self, df):
# df['output_column'] = df['input_column'].apply(make_prediction_nblin)
# return df

def nb_news_application(self, df):
df['output_column'] = df['input_column'].apply(news_prediction)
return df

def pretrained_predict(self, df, model_name):
model_file = f'api/models/{model_name}'
with open(model_file, 'rb') as model:
pipeline = pickle.load(model)

def pretrained_predict(self, df, pipeline):
texts_to_predict = df['input_column']
texts_to_predict = [str(text) for text in texts_to_predict]
predictions = pipeline.predict(texts_to_predict)
Expand Down
Binary file modified api/models/emotion_pipeline.pkl
Binary file not shown.
Binary file modified api/models/hate_speech.pkl
Binary file not shown.
Binary file removed api/models/news_classification.pkl
Binary file not shown.
8 changes: 7 additions & 1 deletion api/models_code/nb_emotions.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# move?

import pandas as pd
import pickle
from sklearn.pipeline import make_pipeline
Expand All @@ -24,4 +26,8 @@

# Salvando o pipeline em um arquivo .pkl
with open("../models/emotion_pipeline.pkl", "wb") as model_file:
pickle.dump(pipeline, model_file)
pickle.dump(pipeline, model_file)




2 changes: 1 addition & 1 deletion api/models_code/nb_hatespeech.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,5 @@
pipeline.fit(X_train, y_train)

# Salvando o pipeline em um arquivo .pkl
with open("hate_speech.pkl", "wb") as model_file:
with open("../models/hate_speech.pkl", "wb") as model_file:
pickle.dump(pipeline, model_file)
2 changes: 1 addition & 1 deletion api/models_code/nb_news.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,5 @@
pipeline.fit(X_train, y_train)

# Salvando o pipeline em um arquivo .pkl
with open("text_classification_pipeline.pkl", "wb") as model_file:
with open("../models/text_classification_pipeline.pkl", "wb") as model_file:
pickle.dump(pipeline, model_file)
32 changes: 22 additions & 10 deletions api/requirements.txt.orig
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,27 @@ seaborn==0.13.0
tensorflow==2.16.1
keras==3.0.0
=======
Flask==2.3.2
Flask-Cors==4.0.0
pandas==2.0.3
nltk==3.8.1
scikit-learn==1.3.0
numpy==1.25.2
matplotlib==3.7.1
seaborn==0.13.0
tensorflow==2.16.1
keras==3.0.0
Flask==2.3.2
Flask-Cors==4.0.0
pandas==2.0.3
nltk==3.8.1
scikit-learn==1.3.0
numpy==1.25.2
matplotlib==3.7.1
seaborn==0.13.0
tensorflow==2.16.1
keras==3.0.0
torchtext==0.17.2
>>>>>>> Stashed changes:requirements.txt

Flask==2.3.2
Flask-Cors==4.0.0
pandas==2.0.3
nltk==3.8.1
scikit-learn==1.3.0
numpy==1.25.2
matplotlib==3.7.1
seaborn==0.13.0
tensorflow==2.16.1
keras==3.0.0
torchtext==0.17.2
Loading

0 comments on commit 3ec629d

Please sign in to comment.