Skip to content

Commit

Permalink
classifiers small fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
jonasgabriel18 committed Nov 3, 2023
1 parent 6988ae0 commit f25c805
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 26 deletions.
34 changes: 16 additions & 18 deletions api/DataProcesser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,54 +7,52 @@
from nltk.corpus import stopwords
# mais imports


class DataProcesser():

df = pd.DataFrame()
input_column = ''
stopwordsenglish = nltk.corpus.stopwords.words('english')

stopwords_english = stopwords.words('english')

def handle_classify(self, df, classifier):
if classifier == 'a':
return self.classify_emotions(df)
elif classifier == 'b':
return self.nb_news_application(df)
classifier_switcher = {
0: self.classify_emotions,
1: self.nb_news_application,
2: self.lin_regression_model
}

return classifier_switcher.get(classifier, lambda: "Invalid Classifier")(df)

def preprocess_text(self, texto):
if self.input_column is not None: # Verifique se a coluna foi definida
# tiro tudo que não for texto e espaço
texto = re.sub('[^a-z\s]', '', texto.lower())
# tokenizo em palavras e elimino as stopwords
palavras = [w for w in texto.split(
) if w not in set(self.stopwordsenglish)]
) if w not in set(self.stopwords_english)]
palavras = [w for w in texto if nltk.corpus.wordnet.synsets(w)]
# texto_junto = ' '.join(texto)
# junto as palavras novamente com espaços
return ' '.join(palavras)
return ''.join(palavras)
else:
return "Coluna não escolhida. Escolha a coluna primeiro."

def nb_news_application(self, df):
nb_model = NbNewsModel(df)
df_result = nb_model.filter_and_classify()
return df_result

def classify_emotions(self, df):
df['output_column'] = df['input_column'].apply(
self.preprocess_text).apply(make_prediction)

result_csv = df # converte o df pra csv
return result_csv

def lin_regression_model(self, df):
df['output_column'] = df['input_column'].apply(
self.preprocess_text).apply(make_prediction_nblin)

result_csv = df # converte o df pra csv
return result_csv

def nb_news_application(self):
self.df['coluna_classificada'] = self.df[self.input_column].apply(self.preprocess_text).apply(news_prediction)
result_csv = self.df
def nb_news_application(self, df):
df['output_column'] = df['input_column'].apply(news_prediction)

result_csv = df
return result_csv

##TODO métodos com o processamento de classificação
2 changes: 1 addition & 1 deletion api/NbLinRegressionModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ def make_prediction_nblin(my_sentence):
with open("./models/linear_reg.pkl", "rb") as f:
model = pickle.load(f)

new_sentence = vectorizer.transform(my_sentence)
new_sentence = vectorizer.transform([my_sentence])

prediction = model.predict(new_sentence)
if prediction == 0:
Expand Down
10 changes: 4 additions & 6 deletions api/NbNewsModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,16 @@
import pickle

def news_prediction(texts):
model_file = "api/models/text_classification_pipeline.pkl"
model_file = "./models/text_classification_pipeline.pkl"
try:
# Carregando o pipeline do arquivo .pkl
with open(model_file, 'rb') as model_file:
pipeline = pickle.load(model_file)

# Fazendo previsões para os textos
predictions = pipeline.predict(texts)
predictions = pipeline.predict([texts])

return predictions[0]

return predictions

except Exception as e:
return str(e)
# df = pd.read_csv("api/training_df/nb_news.csv")
# print(news_prediction(df['short_description']))
2 changes: 1 addition & 1 deletion api/models/nb_news.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB

df = pd.read_csv("api/training_df/nb_news.csv")
df = pd.read_csv("../training_df/nb_news.csv")
# Dividindo os dados em um conjunto de treinamento e um conjunto de teste
x = df['short_description']
y = df['category']
Expand Down
Binary file modified api/models/text_classification_pipeline.pkl
Binary file not shown.

0 comments on commit f25c805

Please sign in to comment.