Merge branch 'main' into nn-fakenews

TailUFPB · Dec 8, 2023 · 1f24f07 · 1f24f07
2 parents c620312 + ee637b2
commit 1f24f07
Show file tree

Hide file tree

Showing 37 changed files with 935 additions and 1,190 deletions.
diff --git a/.github/workflows/create-release.yml b/.github/workflows/create-release.yml
@@ -0,0 +1,38 @@
+name: Create Release
+
+on:
+ push:
+ tags:
+ - 'v*'
+
+jobs:
+ release:
+ name: 🧠 Release
+ runs-on: ubuntu-latest
+ permissions:
+ id-token: write
+ contents: write
+ packages: write
+ pull-requests: write
+ issues: read
+ steps:
+ - name: ⬇️ Checkout source
+ uses: actions/checkout@v3
+
+ - name: ⎔ Setup node package
+ uses: actions/setup-node@v3
+
+ - name: 🟨 Setup python package
+ uses: actions/setup-python@v3
+
+ - name: 📦 Install and build Node dependencies
+ run: npm install
+
+ - name: 🐍 Install and build Python dependencies
+ run: pip install --upgrade pip #-r requirements.txt
+
+ - name: ✍️ Release Electron app
+ run: npm run build
+ # uses: samuelmeuli/[email protected]
+ # with:
+ # github_token: ${{ secrets.github_token }}
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -0,0 +1,91 @@
+name: CI
+# Controls when the workflow will run
+on:
+ push:
+ branches: [ "main" ]
+ pull_request:
+ types: [opened, synchronize]
+
+# Prevent duplicate workflows from running
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ # Static tests don't involve any logic or context.
+ # They are just a set of tests that can detect if we are not introducing any faulty code.
+ static-test:
+ name: 🔬 Static tests
+ runs-on: ubuntu-latest
+ steps:
+ - name: ⬇️ Checkout repo
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 2
+
+ - name: ⎔ Setup Node
+ uses: actions/setup-node@v3
+ with:
+ cache: 'npm'
+
+ - name: 🟨 Setup Python
+ uses: actions/setup-python@v3
+
+ - name: 📦 Install Node dependencies
+ run: npm install
+
+ # Unit tests are tests that are not dependent on any external service.
+ # Usually, they are tests that are testing the logic of a specific function or component.
+ unit-test:
+ needs: [static-test]
+ name: 🚦 Unit tests
+ runs-on: ubuntu-latest
+ steps:
+ - name: ⬇️ Checkout repo
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 2
+
+ - name: ⎔ Setup Node
+ uses: actions/setup-node@v3
+ with:
+ cache: 'npm'
+
+ - name: 🟨 Setup Python
+ uses: actions/setup-python@v3
+
+ - name: 📦 Install dependencies
+ run: npm install
+
+ - name: 🚦 Run unit tests
+ run: npm test
+
+ # Integration tests are tests that are dependent on external services.
+ integration-test:
+ needs: [static-test]
+ name: 🚥 Integration tests
+ runs-on: ubuntu-latest
+ steps:
+ - name: ⬇️ Checkout repo
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 2
+
+ - name: ⎔ Setup Node
+ uses: actions/setup-node@v3
+ with:
+ cache: 'npm'
+
+ - name: 🟨 Setup Python
+ uses: actions/setup-python@v3
+
+ - name: 📦 Install dependencies
+ run: npm install
+
+ # - name: 🐳 Docker compose
+ # run:
+ # docker-compose up -d && sleep 3 && pnpm prisma migrate reset --force
+ # --skip-seed
+
+ - name: 🚦 Run integration tests
+ run: npm test
diff --git a/.gitignore b/.gitignore
@@ -5,6 +5,8 @@
 /.pnp
 .pnp.js
 
+api/__pycache__/
+
 # testing
 /coverage
 

diff --git a/README.md b/README.md
@@ -17,7 +17,7 @@ O projeto utiliza das seguintes tecnologias e ferramentas:
 
 ## Como rodar a aplicação:
 
-Primeiramente, instale as depencencias: 
+Primeiramente, instale as depencencias:
 
 ```bash
 yarn
@@ -30,24 +30,24 @@ Após a conclusão da instalação, crie uma build para produção do front-end:
 ```bash
 yarn build
 # ou
-npm build
+npm run build
 ```
 
 Por fim inicie o electron:
 
 ```bash
 yarn electron
 # ou
-npm electron
+npm run electron
 ```
 
 ### To-do
 
-#### 🚀 Sprint 1
+#### 🚀 Tasks
 
 - [x] Abrir CSV
 - [x] Exibir Preview de N linhas do CSV
-- [ ] Escolher colunas de entrada
-- [ ] Selecionar classificador e enviar dados
-- [ ] Exibir resultado do classificador
-- [ ] Exportar resultado em csv?
+- [x] Escolher colunas de entrada
+- [X] Selecionar classificador e enviar dados
+- [x] Exibir resultado do classificador
+- [x] Exportar resultado em csv?
diff --git a/api/DataProcesser.py b/api/DataProcesser.py
@@ -1,45 +1,61 @@
 import pandas as pd
 from NbNewsModel import news_prediction
 from NbEmotionsModel import make_prediction
-from io import BytesIO
+from NbLinRegressionModel import make_prediction_nblin
 import nltk
 import re
 from nltk.corpus import stopwords
-#mais imports
+# mais imports
 
 class DataProcesser():
-
  df = pd.DataFrame()
- input_column = 'short_description'
- stopwordsenglish = nltk.corpus.stopwords.words('english')
+ input_column = ''
+ stopwords_english = stopwords.words('english')
+
+ def handle_classify(self, df, classifier):
+ classifier_switcher = {
+ 0: self.classify_emotions,
+ 1: self.nb_news_application,
+ 2: self.lin_regression_model
+ }
+
+ return classifier_switcher.get(classifier, lambda: "Invalid Classifier")(df)
+
+ def generate_statistics(self, df):
+ unique_labels = df['output_column'].unique()
 
- def set_current_file(self, file):
- bytes_io = BytesIO(file)
- df = pd.read_csv(bytes_io)
- self.df = df
+ statistics = {
+ 'total_rows': len(df),
+ 'unique_labels': list(unique_labels),
+ 'label_counts': df['output_column'].value_counts().to_dict()
+ }
 
- def set_input_column(self, column):
- self.df.input_column = column
+ return statistics
 
  def preprocess_text(self, texto):
  if self.input_column is not None: # Verifique se a coluna foi definida
- texto = re.sub('[^a-z\s]', '', texto.lower()) # tiro tudo que não for texto e espaço
- palavras = [w for w in texto.split() if w not in set(self.stopwordsenglish)] # tokenizo em palavras e elimino as stopwords
+ # tiro tudo que não for texto e espaço
+ texto = re.sub('[^a-z\s]', '', texto.lower())
+ # tokenizo em palavras e elimino as stopwords
+ palavras = [w for w in texto.split(
+ ) if w not in set(self.stopwords_english)]
  palavras = [w for w in texto if nltk.corpus.wordnet.synsets(w)]
- #texto_junto = ' '.join(texto)
- return ' '.join(palavras) # junto as palavras novamente com espaços
+ # texto_junto = ' '.join(texto)
+ # junto as palavras novamente com espaços
+ return ''.join(palavras)
  else:
  return "Coluna não escolhida. Escolha a coluna primeiro."
 
- def classify_emotions(self):
- self.df['coluna_classificada'] = self.df[self.input_column].apply(self.preprocess_text).apply(make_prediction)
- result_csv = self.df# converte o df pra csv
- return result_csv
+ def classify_emotions(self, df):
+ df['output_column'] = df['input_column'].apply(make_prediction)
+ return df
 
+ def lin_regression_model(self, df):
+ df['output_column'] = df['input_column'].apply(make_prediction_nblin)
+ return df
 
- def nb_news_application(self):
- self.df['coluna_classificada'] = self.df[self.input_column].apply(self.preprocess_text).apply(news_prediction)
- result_csv = self.df
- return result_csv
+ def nb_news_application(self, df):
+ df['output_column'] = df['input_column'].apply(news_prediction)
+ return df
 
- ##TODO métodos com o processamento de classificação
+ ##TODO métodos com o processamento de classificação
diff --git a/api/NbEmotionsModel.py b/api/NbEmotionsModel.py
@@ -15,4 +15,4 @@ def make_prediction(my_sentence):
 
  new_sentence = tfidf_vectorizer.transform([my_sentence])
  prediction = model.predict(new_sentence)
- return prediction
+ return prediction[0]
diff --git a/api/NbLinRegressionModel.py b/api/NbLinRegressionModel.py
@@ -0,0 +1,16 @@
+import pickle
+
+def make_prediction_nblin(my_sentence):
+ with open("./models/vectorizer_lin.pkl", "rb") as f:
+ vectorizer = pickle.load(f)
+
+ with open("./models/linear_reg.pkl", "rb") as f:
+ model = pickle.load(f)
+
+ new_sentence = vectorizer.transform([my_sentence])
+
+ prediction = model.predict(new_sentence)
+ if prediction == 0:
+ return "Negativo"
+ else:
+ return "Positivo"
diff --git a/api/NbNewsModel.py b/api/NbNewsModel.py
@@ -2,18 +2,16 @@
 import pickle
 
 def news_prediction(texts):
- model_file = "api/models/text_classification_pipeline.pkl"
+ model_file = "./models/text_classification_pipeline.pkl"
  try:
  # Carregando o pipeline do arquivo .pkl
  with open(model_file, 'rb') as model_file:
  pipeline = pickle.load(model_file)
 
  # Fazendo previsões para os textos
- predictions = pipeline.predict(texts)
+ predictions = pipeline.predict([texts])
+
+ return predictions[0]
 
- return predictions
-
  except Exception as e:
  return str(e)
-# df = pd.read_csv("api/training_df/nb_news.csv")
-# print(news_prediction(df['short_description']))
diff --git a/api/__pycache__/DataProcesser.cpython-310.pyc b/api/__pycache__/DataProcesser.cpython-310.pyc
diff --git a/api/__pycache__/NbEmotionsModel.cpython-310.pyc b/api/__pycache__/NbEmotionsModel.cpython-310.pyc
diff --git a/api/__pycache__/NbNewsModel.cpython-310.pyc b/api/__pycache__/NbNewsModel.cpython-310.pyc
diff --git a/api/__pycache__/app.cpython-311.pyc b/api/__pycache__/app.cpython-311.pyc