Skip to content

Commit

Permalink
Merge pull request #17 from TailUFPB/main
Browse files Browse the repository at this point in the history
Update develop
  • Loading branch information
jonasgabriel18 authored Apr 6, 2024
2 parents ebde421 + 1113171 commit eeeef12
Show file tree
Hide file tree
Showing 17 changed files with 1,247 additions and 246 deletions.
12 changes: 9 additions & 3 deletions .github/workflows/develop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,11 @@ jobs:
- name: 🏗️ Build
run: npm run build

- name: 📦 Electron Package
run: npx electron-packager . LinguifAI --platform=win32 --arch=x64 --out=dist
#- name: 📦 Electron Package
# run: npx electron-packager . LinguifAI --platform=win32 --arch=x64 --out=dist

- name: 📦 Electron Builder
run: npm run electron:package:win

- name: Get latest release number
id: get_latest_release
Expand All @@ -139,12 +142,15 @@ jobs:
const newVersion = `${parseInt(versionParts[0])}.${parseInt(versionParts[1])}.${parseInt(versionParts[2]) + 1}`;
console.log(`::set-output name=new_version::${newVersion}`);
- name: Rename file
run: mv dist/LinguifAI\ Setup\ 0.1.0.exe dist/LinguifAI\ Setup\ ${{ steps.get_latest_release.outputs.new_version }}.exe

- name: Create Release
id: create_release
uses: softprops/action-gh-release@v1
with:
files: |
./dist/LinguifAI-win32-x64/LinguifAI.exe
./dist/LinguifAI\ Setup\ ${{ steps.get_latest_release.outputs.new_version }}.exe
tag_name: v${{ steps.get_latest_release.outputs.new_version }}
name: Release v${{ steps.get_latest_release.outputs.new_version }}
prerelease: true
Expand Down
28 changes: 21 additions & 7 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,15 @@ jobs:
with:
node-version: "14"

- name: Install Wine
- name: Install Wine64
run: sudo apt update && sudo apt install wine64

- name: Install Wine32
run: |
sudo dpkg --add-architecture i386
sudo apt-get update
sudo apt-get install wine32
- name: ⬇️ Checkout repo
uses: actions/checkout@v4
with:
Expand All @@ -121,8 +127,13 @@ jobs:
- name: 🏗️ Build
run: npm run build

- name: 📦 Electron Package
run: npx electron-packager . LinguifAI --platform=win32 --arch=x64 --out=dist
#- name: 📦 Electron Package
# run: npx electron-packager . LinguifAI --platform=win32 --arch=x64 --out=dist

- name: 📦 Electron Builder
run: npm run electron:package:win
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Get latest release number
id: get_latest_release
Expand All @@ -136,17 +147,20 @@ jobs:
});
const latestRelease = response.data.tag_name;
const versionParts = latestRelease.replace(/^v/, '').split('.');
const newVersion = `${parseInt(versionParts[0])}.${parseInt(versionParts[1])}.${parseInt(versionParts[2]) + 1}`;
const newVersion = `${parseInt(versionParts[0])}.${parseInt(versionParts[1]) + 1}.0`;
console.log(`::set-output name=new_version::${newVersion}`);
- name: Rename file
run: mv dist/LinguifAI\ Setup\ 0.1.0.exe dist/LinguifAI\ Setup\ ${{ steps.get_latest_release.outputs.new_version }}.exe

- name: Create Release
id: create_release
uses: softprops/action-gh-release@v1
with:
files: |
./dist/LinguifAI-win32-x64/LinguifAI.exe
./dist/LinguifAI\ Setup\ ${{ steps.get_latest_release.outputs.new_version }}.exe
tag_name: v${{ steps.get_latest_release.outputs.new_version }}
release_name: Release v${{ steps.get_latest_release.outputs.new_version }}
draft: true
name: Release v${{ steps.get_latest_release.outputs.new_version }}
prerelease: true
body: |
Descrição do release aqui
122 changes: 71 additions & 51 deletions api/DataProcesser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,21 @@
from NbEmotionsModel import make_prediction
from NbLinRegressionModel import make_prediction_nblin
from available_classifiers import get_available_classifiers
from tensorflow.python.keras.models import load_model

import scipy as sp
import pandas as pd
import numpy as np
import tensorflow as tf
import Neural_Network2
import pickle
import re
import joblib
import numpy as np
import string
import os
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd
import torch
from collections import Counter
from functools import partial

import nltk
from nltk.tokenize import wordpunct_tokenize
from nltk.corpus import stopwords
nltk.download('stopwords')
# mais imports
Expand All @@ -31,7 +32,7 @@ def handle_classify(self, df, classifier):
model_name = classifier_switcher[classifier]
if model_name.endswith('.pkl'):
return self.pretrained_predict(df, model_name)
elif model_name.endswith('.h5'):
else:
return self.trained_predict(df, model_name)
#classifier_switcher = {
# 0: self.classify_emotions,
Expand All @@ -54,16 +55,13 @@ def generate_statistics(self, df):


def preprocess_text(self, text):
text = str(text).lower()
text = re.sub('\[.*?\]', '', text)
text = re.sub("\\W", " ", text)
text = re.sub('https?://\S+|www\.\S+', '', text)
text = re.sub('<.*?>+', '', text)
text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
text = re.sub('\n', '', text)
text = re.sub('\w*\d\w*', '', text)

return text
stop_words = set(stopwords.words('english'))
text = str(text)
text = re.sub(r'[^\w\s]', '', text)
text = text.lower()
tokens = wordpunct_tokenize(text)
tokens = [token for token in tokens if token not in stop_words]
return tokens


def classify_emotions(self, df):
Expand All @@ -84,49 +82,71 @@ def pretrained_predict(self, df, model_name):
pipeline = pickle.load(model)

texts_to_predict = df['input_column']
texts_to_predict = [str(text) for text in texts_to_predict]
predictions = pipeline.predict(texts_to_predict)
df['output_column'] = predictions
return df

def load_weights_and_model(self, name):
model_filename = f"api/models/{name}"
num_classes = model_filename[model_filename.index("s") + 2 : model_filename.index("-")]
model = tf.keras.Sequential([
tf.keras.layers.Embedding(input_dim=20000, output_dim=128),
tf.keras.layers.LSTM(64),
tf.keras.layers.Dense(int(num_classes), activation='softmax')
])
model.load_weights(model_filename)
return model

def trained_predict(self, df, model_name):
model = self.load_weights_and_model(model_name)


encoder_name = model_name[model_name.index('l') + 2 : model_name.index('.')]
model_filename = os.path.join("api", "models", name)
if os.path.exists(model_filename):
model = torch.load(model_filename)
return model
else:
raise FileNotFoundError(f"Model file '{model_filename}' not found.")

label_map_filename = f"api\encoders/LabelMapping-{encoder_name}.joblib"
def trained_predict(self, df, model_name):
label_map_filename = f"api/encoders/LabelMapping-{model_name}.joblib"
label_encoder = joblib.load(label_map_filename)

raw_text = df['input_column'].tolist()

# prediction (nao sei como fazer agora)
# vectorizer = TfidfVectorizer(max_features=20000)
# raw_text = [self.preprocess_text(text).encode("utf-8") for text in raw_text]
# vectorizer.fit_transform(raw_text)
# vectorized_data = vectorizer.transform(raw_text)

# vectorized_data = np.asarray(vectorized_data.todense())

# # Make predictions using the model

# predictions = model.predict(vectorized_data)

# predicted_labels_encoded = tf.argmax(predictions, axis=1).numpy()

# predicted_labels = [label_encoder.classes_[label] for label in predicted_labels_encoded]
model = self.load_weights_and_model(model_name)
model.eval()

# df['output_column'] = predicted_labels
stop_words = set(stopwords.words('english'))

df['tokens'] = df.input_column.progress_apply(
partial(Neural_Network2.tokenize, stop_words=stop_words),
)

all_tokens = [sublst for lst in df.tokens.tolist() for sublst in lst]
common_tokens = set(list(zip(
*Counter(all_tokens).most_common(20000)))[0])
df.loc[:, 'tokens'] = df.tokens.progress_apply(
partial(
Neural_Network2.remove_rare_words,
common_tokens=common_tokens,
max_len=200,
),
)

df = df[df.tokens.progress_apply(
lambda tokens: any(token != '<UNK>' for token in tokens),
)]

vocab = sorted({
sublst for lst in df.tokens.tolist() for sublst in lst
})
self.token2idx = {token: idx for idx, token in enumerate(vocab)}

self.token2idx['<PAD>'] = max(self.token2idx.values()) + 1

self.idx2token = {idx: token for token, idx in self.token2idx.items()}

df['indexed_tokens'] = df.tokens.apply(
lambda tokens: [self.token2idx[token] for token in tokens],
)

predictions = []
for input_column_row in df['indexed_tokens']:
with torch.no_grad():
_, logits = model([input_column_row], return_activations=True)
logits = logits.detach().cpu().numpy()
prediction = np.argmax(logits, axis=1)[0]
predictions.append(prediction)

decoded_predictions = label_encoder.inverse_transform(predictions)

df['output_column'] = decoded_predictions

return df

Expand Down
Loading

0 comments on commit eeeef12

Please sign in to comment.