diff --git a/api/DataProcesser.py b/api/DataProcesser.py index 49846d90..3934be55 100644 --- a/api/DataProcesser.py +++ b/api/DataProcesser.py @@ -5,6 +5,7 @@ from available_classifiers import get_available_classifiers from tensorflow.python.keras.models import load_model +import scipy as sp import pandas as pd import numpy as np import tensorflow as tf @@ -13,6 +14,7 @@ import joblib import string import os +from sklearn.feature_extraction.text import TfidfVectorizer import nltk from nltk.corpus import stopwords @@ -49,6 +51,7 @@ def generate_statistics(self, df): } return statistics + def preprocess_text(self, text): text = str(text).lower() @@ -59,9 +62,10 @@ def preprocess_text(self, text): text = re.sub('[%s]' % re.escape(string.punctuation), '', text) text = re.sub('\n', '', text) text = re.sub('\w*\d\w*', '', text) - + return text + def classify_emotions(self, df): df['output_column'] = df['input_column'].apply(make_prediction) return df @@ -86,7 +90,7 @@ def pretrained_predict(self, df, model_name): def load_weights_and_model(self, name): model_filename = f"api/models/{name}" - num_classes = model_filename[model_filename.index("/") + 1, model_filename.index("-")] + num_classes = model_filename[model_filename.index("s") + 2 : model_filename.index("-")] model = tf.keras.Sequential([ tf.keras.layers.Embedding(input_dim=20000, output_dim=128), tf.keras.layers.LSTM(64), @@ -98,20 +102,31 @@ def load_weights_and_model(self, name): def trained_predict(self, df, model_name): model = self.load_weights_and_model(model_name) - encoder_re = r'Trained-Model-(.*?).keras' - encoder_name = re.search(encoder_re, model_name).group(1) + + encoder_name = model_name[model_name.index('l') + 2 : model_name.index('.')] label_map_filename = f"api\encoders/LabelMapping-{encoder_name}.joblib" label_encoder = joblib.load(label_map_filename) raw_text = df['input_column'].tolist() - test_texts = [self.preprocess_text(text) for text in raw_text] + + # prediction (nao sei como fazer agora) + # vectorizer = TfidfVectorizer(max_features=20000) + # raw_text = [self.preprocess_text(text).encode("utf-8") for text in raw_text] + # vectorizer.fit_transform(raw_text) + # vectorized_data = vectorizer.transform(raw_text) + + # vectorized_data = np.asarray(vectorized_data.todense()) + + # # Make predictions using the model + + # predictions = model.predict(vectorized_data) + + # predicted_labels_encoded = tf.argmax(predictions, axis=1).numpy() - predictions = model.predict(test_texts) - predicted_labels_encoded = tf.argmax(predictions, axis=1).numpy() - predicted_labels = [label_encoder.classes_[label] for label in predicted_labels_encoded] + # predicted_labels = [label_encoder.classes_[label] for label in predicted_labels_encoded] - df['output_column'] = predicted_labels + # df['output_column'] = predicted_labels return df diff --git a/api/Neural_Network2.py b/api/Neural_Network2.py index 112953c9..1cdc04fa 100644 --- a/api/Neural_Network2.py +++ b/api/Neural_Network2.py @@ -79,7 +79,6 @@ def create_and_train_model(train_texts, train_labels, name, epochs=5, batch_size num_classes = len(label_encoder.classes_) train_labels_one_hot = tf.keras.utils.to_categorical(train_labels_encoded, num_classes=num_classes) - #label_mapping_file = os.path.join(dirname, rf"api/encoders/LabelMapping-{name}.joblib") label_mapping_file = f"api/encoders/LabelMapping-{name}.joblib" joblib.dump(label_encoder, label_mapping_file) @@ -88,10 +87,7 @@ def create_and_train_model(train_texts, train_labels, name, epochs=5, batch_size train_texts = [preprocess_text(text) for text in train_texts] train_texts_tfidf = tfidf_vectorizer.fit_transform(train_texts) - # Cria um conjunto de dados de texto usando a API de conjuntos de dados do TensorFlow train_dataset = tf.data.Dataset.from_tensor_slices((train_texts_tfidf.toarray(), train_labels_one_hot)) - - # Embaralha e agrupa os dados train_dataset = train_dataset.shuffle(len(train_texts)).batch(32) # Parâmetros do modelo @@ -99,29 +95,28 @@ def create_and_train_model(train_texts, train_labels, name, epochs=5, batch_size # Define a arquitetura do modelo model = tf.keras.Sequential([ - tf.keras.layers.Dense(64, activation='relu', input_shape=(num_features,)), + tf.keras.layers.Embedding(input_dim=num_features, output_dim=64), + tf.keras.layers.SimpleRNN(64), tf.keras.layers.Dense(num_classes, activation='softmax') ]) - + model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) try: progress_callback = TrainingProgressCallback() - # Treina o modelo history = model.fit(train_dataset, epochs=epochs, batch_size=batch_size, callbacks=[progress_callback]) - # Salva o modelo - model_filename = f"api/models/Trained-Model-{name}.keras" - model.save(model_filename) + model_filename = f"api/models/{str(num_classes)}-Trained-Model-{name}.weights.h5" + model.save_weights(model_filename) - # Obtém estatísticas do treinamento training_stats = { "loss": history.history['loss'], "accuracy": history.history['accuracy'] } - # Retorna estatísticas como JSON return json.dumps(training_stats) except Exception as e: - return f"Error during model creation/training: {str(e)}" \ No newline at end of file + return f"Error during model creation/training: {str(e)}" + + diff --git a/api/available_classifiers.py b/api/available_classifiers.py index 6541f33d..73dc5657 100644 --- a/api/available_classifiers.py +++ b/api/available_classifiers.py @@ -13,7 +13,7 @@ def get_available_classifiers(): classifiers = {} for file in model_files: - if file.endswith('.pkl') or file.endswith('.keras'): + if file.endswith('.pkl') or file.endswith('.keras') or file.endswith('.h5'): classifiers[len(classifiers)] = file return classifiers \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index e07cd6b4..14b780e3 100644 Binary files a/requirements.txt and b/requirements.txt differ