Skip to content

Commit

Permalink
Merge branch 'main' of github.com:TailUFPB/LinguifAI
Browse files Browse the repository at this point in the history
  • Loading branch information
tahaluh committed Mar 28, 2024
2 parents b810cd8 + f80f5ca commit ea213f3
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 23 deletions.
33 changes: 24 additions & 9 deletions api/DataProcesser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from available_classifiers import get_available_classifiers
from tensorflow.python.keras.models import load_model

import scipy as sp
import pandas as pd
import numpy as np
import tensorflow as tf
Expand All @@ -13,6 +14,7 @@
import joblib
import string
import os
from sklearn.feature_extraction.text import TfidfVectorizer

import nltk
from nltk.corpus import stopwords
Expand Down Expand Up @@ -49,6 +51,7 @@ def generate_statistics(self, df):
}

return statistics


def preprocess_text(self, text):
text = str(text).lower()
Expand All @@ -59,9 +62,10 @@ def preprocess_text(self, text):
text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
text = re.sub('\n', '', text)
text = re.sub('\w*\d\w*', '', text)

return text


def classify_emotions(self, df):
df['output_column'] = df['input_column'].apply(make_prediction)
return df
Expand All @@ -86,7 +90,7 @@ def pretrained_predict(self, df, model_name):

def load_weights_and_model(self, name):
model_filename = f"api/models/{name}"
num_classes = model_filename[model_filename.index("/") + 1, model_filename.index("-")]
num_classes = model_filename[model_filename.index("s") + 2 : model_filename.index("-")]
model = tf.keras.Sequential([
tf.keras.layers.Embedding(input_dim=20000, output_dim=128),
tf.keras.layers.LSTM(64),
Expand All @@ -98,20 +102,31 @@ def load_weights_and_model(self, name):
def trained_predict(self, df, model_name):
model = self.load_weights_and_model(model_name)

encoder_re = r'Trained-Model-(.*?).keras'
encoder_name = re.search(encoder_re, model_name).group(1)

encoder_name = model_name[model_name.index('l') + 2 : model_name.index('.')]

label_map_filename = f"api\encoders/LabelMapping-{encoder_name}.joblib"
label_encoder = joblib.load(label_map_filename)

raw_text = df['input_column'].tolist()
test_texts = [self.preprocess_text(text) for text in raw_text]

# prediction (nao sei como fazer agora)
# vectorizer = TfidfVectorizer(max_features=20000)
# raw_text = [self.preprocess_text(text).encode("utf-8") for text in raw_text]
# vectorizer.fit_transform(raw_text)
# vectorized_data = vectorizer.transform(raw_text)

# vectorized_data = np.asarray(vectorized_data.todense())

# # Make predictions using the model

# predictions = model.predict(vectorized_data)

# predicted_labels_encoded = tf.argmax(predictions, axis=1).numpy()

predictions = model.predict(test_texts)
predicted_labels_encoded = tf.argmax(predictions, axis=1).numpy()
predicted_labels = [label_encoder.classes_[label] for label in predicted_labels_encoded]
# predicted_labels = [label_encoder.classes_[label] for label in predicted_labels_encoded]

df['output_column'] = predicted_labels
# df['output_column'] = predicted_labels

return df

Expand Down
21 changes: 8 additions & 13 deletions api/Neural_Network2.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ def create_and_train_model(train_texts, train_labels, name, epochs=5, batch_size
num_classes = len(label_encoder.classes_)
train_labels_one_hot = tf.keras.utils.to_categorical(train_labels_encoded, num_classes=num_classes)

#label_mapping_file = os.path.join(dirname, rf"api/encoders/LabelMapping-{name}.joblib")
label_mapping_file = f"api/encoders/LabelMapping-{name}.joblib"
joblib.dump(label_encoder, label_mapping_file)

Expand All @@ -88,40 +87,36 @@ def create_and_train_model(train_texts, train_labels, name, epochs=5, batch_size
train_texts = [preprocess_text(text) for text in train_texts]
train_texts_tfidf = tfidf_vectorizer.fit_transform(train_texts)

# Cria um conjunto de dados de texto usando a API de conjuntos de dados do TensorFlow
train_dataset = tf.data.Dataset.from_tensor_slices((train_texts_tfidf.toarray(), train_labels_one_hot))

# Embaralha e agrupa os dados
train_dataset = train_dataset.shuffle(len(train_texts)).batch(32)

# Parâmetros do modelo
num_features = train_texts_tfidf.shape[1]

# Define a arquitetura do modelo
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu', input_shape=(num_features,)),
tf.keras.layers.Embedding(input_dim=num_features, output_dim=64),
tf.keras.layers.SimpleRNN(64),
tf.keras.layers.Dense(num_classes, activation='softmax')
])

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

try:
progress_callback = TrainingProgressCallback()
# Treina o modelo
history = model.fit(train_dataset, epochs=epochs, batch_size=batch_size, callbacks=[progress_callback])

# Salva o modelo
model_filename = f"api/models/Trained-Model-{name}.keras"
model.save(model_filename)
model_filename = f"api/models/{str(num_classes)}-Trained-Model-{name}.weights.h5"
model.save_weights(model_filename)

# Obtém estatísticas do treinamento
training_stats = {
"loss": history.history['loss'],
"accuracy": history.history['accuracy']
}

# Retorna estatísticas como JSON
return json.dumps(training_stats)

except Exception as e:
return f"Error during model creation/training: {str(e)}"
return f"Error during model creation/training: {str(e)}"


2 changes: 1 addition & 1 deletion api/available_classifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def get_available_classifiers():
classifiers = {}

for file in model_files:
if file.endswith('.pkl') or file.endswith('.keras'):
if file.endswith('.pkl') or file.endswith('.keras') or file.endswith('.h5'):
classifiers[len(classifiers)] = file

return classifiers
Binary file modified requirements.txt
Binary file not shown.

0 comments on commit ea213f3

Please sign in to comment.