From f25c80587a732e64a5c6a5018b29a045529a2094 Mon Sep 17 00:00:00 2001
From: Jonas Gabriel <jonasftw2222@gmail.com>
Date: Fri, 3 Nov 2023 13:49:34 -0300
Subject: [PATCH] classifiers small fixes

---
 api/DataProcesser.py                        |  34 +++++++++-----------
 api/NbLinRegressionModel.py                 |   2 +-
 api/NbNewsModel.py                          |  10 +++---
 api/models/nb_news.py                       |   2 +-
 api/models/text_classification_pipeline.pkl | Bin 5389646 -> 5389722 bytes
 5 files changed, 22 insertions(+), 26 deletions(-)

diff --git a/api/DataProcesser.py b/api/DataProcesser.py
index be56cbda..7c82fff2 100644
--- a/api/DataProcesser.py
+++ b/api/DataProcesser.py
@@ -7,19 +7,19 @@
 from nltk.corpus import stopwords
 # mais imports
 
-
 class DataProcesser():
-
     df = pd.DataFrame()
     input_column = ''
-    stopwordsenglish = nltk.corpus.stopwords.words('english')
-
+    stopwords_english = stopwords.words('english')
 
     def handle_classify(self, df, classifier):
-        if classifier == 'a':
-            return self.classify_emotions(df)
-        elif classifier == 'b':
-            return self.nb_news_application(df)
+        classifier_switcher = {
+            0: self.classify_emotions,
+            1: self.nb_news_application,
+            2: self.lin_regression_model
+        }
+
+        return classifier_switcher.get(classifier, lambda: "Invalid Classifier")(df)
 
     def preprocess_text(self, texto):
         if self.input_column is not None:  # Verifique se a coluna foi definida
@@ -27,34 +27,32 @@ def preprocess_text(self, texto):
             texto = re.sub('[^a-z\s]', '', texto.lower())
             # tokenizo em palavras e elimino as stopwords
             palavras = [w for w in texto.split(
-            ) if w not in set(self.stopwordsenglish)]
+            ) if w not in set(self.stopwords_english)]
             palavras = [w for w in texto if nltk.corpus.wordnet.synsets(w)]
             # texto_junto = ' '.join(texto)
             # junto as palavras novamente com espaços
-            return ' '.join(palavras)
+            return ''.join(palavras)
         else:
             return "Coluna não escolhida. Escolha a coluna primeiro."
 
-    def nb_news_application(self, df):
-        nb_model = NbNewsModel(df)
-        df_result = nb_model.filter_and_classify()
-        return df_result
-
     def classify_emotions(self, df):
         df['output_column'] = df['input_column'].apply(
             self.preprocess_text).apply(make_prediction)
+
         result_csv = df  # converte o df pra csv
         return result_csv
 
     def lin_regression_model(self, df):
         df['output_column'] = df['input_column'].apply(
             self.preprocess_text).apply(make_prediction_nblin)
+
         result_csv = df  # converte o df pra csv
         return result_csv
 
-    def nb_news_application(self):
-        self.df['coluna_classificada'] = self.df[self.input_column].apply(self.preprocess_text).apply(news_prediction)
-        result_csv = self.df
+    def nb_news_application(self, df):
+        df['output_column'] = df['input_column'].apply(news_prediction)
+
+        result_csv = df
         return result_csv
 
     ##TODO métodos com o processamento de classificação
diff --git a/api/NbLinRegressionModel.py b/api/NbLinRegressionModel.py
index 4281320a..472aecd8 100644
--- a/api/NbLinRegressionModel.py
+++ b/api/NbLinRegressionModel.py
@@ -7,7 +7,7 @@ def make_prediction_nblin(my_sentence):
     with open("./models/linear_reg.pkl", "rb") as f:
         model = pickle.load(f)
 
-    new_sentence = vectorizer.transform(my_sentence)
+    new_sentence = vectorizer.transform([my_sentence])
 
     prediction = model.predict(new_sentence)
     if prediction == 0:
diff --git a/api/NbNewsModel.py b/api/NbNewsModel.py
index 174def97..bbe8f9ba 100644
--- a/api/NbNewsModel.py
+++ b/api/NbNewsModel.py
@@ -2,18 +2,16 @@
 import pickle
 
 def news_prediction(texts):
-    model_file = "api/models/text_classification_pipeline.pkl"
+    model_file = "./models/text_classification_pipeline.pkl"
     try:
         # Carregando o pipeline do arquivo .pkl
         with open(model_file, 'rb') as model_file:
             pipeline = pickle.load(model_file)
 
         # Fazendo previsões para os textos
-        predictions = pipeline.predict(texts)
+        predictions = pipeline.predict([texts])
+
+        return predictions[0]
 
-        return predictions
-    
     except Exception as e:
         return str(e)
-# df = pd.read_csv("api/training_df/nb_news.csv")
-# print(news_prediction(df['short_description']))
\ No newline at end of file
diff --git a/api/models/nb_news.py b/api/models/nb_news.py
index db90a8dd..b32627d8 100644
--- a/api/models/nb_news.py
+++ b/api/models/nb_news.py
@@ -6,7 +6,7 @@
 from sklearn.model_selection import train_test_split
 from sklearn.naive_bayes import MultinomialNB
 
-df = pd.read_csv("api/training_df/nb_news.csv")
+df = pd.read_csv("../training_df/nb_news.csv")
 # Dividindo os dados em um conjunto de treinamento e um conjunto de teste
 x = df['short_description']
 y = df['category']
diff --git a/api/models/text_classification_pipeline.pkl b/api/models/text_classification_pipeline.pkl
index be499f23662c40d7225f9e34fcab3b9f3f59556b..7ae389e6e3e0d1ae784663b9360c3bb921d0b816 100644
GIT binary patch
delta 902
zcmaLV%TE(Q90%}ryM6GGhde5Rq9XE=N(B_KJVXR!sSgqpE@U%or_O3^o87hs<H49n
zOR|ud4r$_FVB&!oOjZvj96apF#Ean~2NU#6^kC$GU#ccX)J;Bzo!@NcoA2zKZ<g?Q
z+Y-e4;_{<s;&Xd;!`_{JVQ=hbO{|%<WLRs4wJjJWQ8`RWBB@bbR--ZxN&%xlRzqPq
zq>>xlDAyG=KIhZpMANBH&bA9n9>W<*%*82ReI<mqyaQ}VvOVN%IL)Vcvt$oW2Jkkq
z+V@4;YT7fDc86nz5bog%nIB&`n3R1hRHoN9?dIC1F!N!$#AdhcK0Lu^jVhTDT~<gm
z7F87zk;5_VHc9Y|QIQ|$F)cwu$R1J^s`E^8)+m$pTM<gMsGOpjuEwI=aQc0nzK-0e
z<!Ez$%}syaHdB^kX{TVh@aIuE(GnZX4om!mMdz<YujQ;a2QBBvordM&>U<_!P!S9<
zMfC&^-s?5oH_2T}HNt~W>&$V>^}5uduu!ga-4&lKLR5vyBob$Y8{!=#;>^F>ut$jy
zT}S1d{n?OInJ>t3-QV?lL*~16*SuK$*WEKY?Y-+|&E6S~n&)gLJ+k2fH+Y}`3ZV#!
zVIP!0DU?Au?1u`dges^8FC2gxI0&^+2Z!J=)WZ=t3df)Ujzc4yfF@{$7HBm`HrhNZ
z|H(`jPp0W&JNV!fbbudDLnm}WH=Kd9&;z~D2j?IF{V)K7Fa+me7%spFT!c{=gK@Y7
n6ClAP1mQAVfvYB6yyjl{*Bv|vOg+ipeS7~@tqVUva@O}941iy1

delta 797
zcmaLUKTH#G6bJC$pI%!*KrPjlDo908^eD9=Xe$Du;Blf&oEYlSUcDM)YOiQ84m!w5
z(h`#X>Ln(`)x?1`kX%@t9Bz?`0S6gOs*^)UHKu-BH8G<8lF#sdzxVQfFYnn`lV1N~
z((FUFkSwyRRQ?mykvFJ5k0Y9lC!+DBbc1A@)L1e#%d06Vt;&3Ul3t({-%5#2ls#c(
zP_R5E%8*7Pq)VX0i4dZ(`C<P`1JhI1Nr`fIZc7rXNuyah(NE@|{HSD<ac!eS85<?~
z&ii`WLRqSZWXQDs$YfkL&d&#ed_Z3^nQrT*my|ynZ%k&PqQ_0v=O}PL3k=O<KUVa6
zrnS{ku>87wYFa;3d}55wvUg=QL&OIIS=&wNzAUFDBEIw%zBSj{S&Zi<_qvgZD%bVI
zCYQ>$(aw>&+tGw9WoFW{8YQ9-%GzVMBvq|O%Ab=7&e~wiwcg<GlNs%AxjEML*X$|%
z)jMv@z5NZX=7=(-EpBmO13Nfi2h>45?1TnrgeKSpyI~JFp&4AT7xsZ0JkSDO*bfJw
z724n+w1W>0K?fX$PUwOoaMW1b>b5Wc=g-C5F|C+84m|XLA9^7GCm;wXp$|?$KMcTW
zI0GRF!&w-Fb1($MFaqac6fVF;7=v-R1OiNe2$x|Jt{BDKRon8vcCa3byr`{xHXLc;
Ju>YB9{yXs9OhW(w