diff --git a/api/DataProcesser.py b/api/DataProcesser.py index 06353468..9c293e5e 100644 --- a/api/DataProcesser.py +++ b/api/DataProcesser.py @@ -14,6 +14,7 @@ import nltk from nltk.corpus import stopwords +nltk.download('stopwords') # mais imports class DataProcesser(): diff --git a/api/Neural_Network2.py b/api/Neural_Network2.py index 4c06fab4..af37a1bc 100644 --- a/api/Neural_Network2.py +++ b/api/Neural_Network2.py @@ -4,14 +4,18 @@ import pandas as pd import numpy as np import tensorflow as tf +import os import joblib +from sklearn.feature_extraction.text import TfidfVectorizer + from tensorflow.python.keras import layers -from tensorflow.python.keras.layers import TextVectorization from tensorflow.python.keras.models import Sequential from sklearn.preprocessing import LabelEncoder from tensorflow.python.keras.callbacks import Callback +dirname = os.path.dirname(__file__) + def preprocess_text(text): text = text.lower() text = re.sub('\[.*?\]', '', text) @@ -75,41 +79,28 @@ def create_and_train_model(train_texts, train_labels, name, epochs=5, batch_size num_classes = len(label_encoder.classes_) train_labels_one_hot = tf.keras.utils.to_categorical(train_labels_encoded, num_classes=num_classes) + #label_mapping_file = os.path.join(dirname, rf"api/encoders/LabelMapping-{name}.joblib") label_mapping_file = f"api/encoders/LabelMapping-{name}.joblib" joblib.dump(label_encoder, label_mapping_file) + tfidf_vectorizer = TfidfVectorizer(max_features=20000) + train_texts_tfidf = tfidf_vectorizer.fit_transform(train_texts) + # Cria um conjunto de dados de texto usando a API de conjuntos de dados do TensorFlow - train_dataset = tf.data.Dataset.from_tensor_slices((train_texts, train_labels_one_hot)) + train_dataset = tf.data.Dataset.from_tensor_slices((train_texts_tfidf.toarray(), train_labels_one_hot)) # Embaralha e agrupa os dados train_dataset = train_dataset.shuffle(len(train_texts)).batch(32) # Parâmetros do modelo - max_features = 20000 - embedding_dim = 128 - sequence_length = 500 - - # Cria uma camada de vetorização de texto - vectorize_layer = TextVectorization( - max_tokens=max_features, - output_mode="int", - output_sequence_length=sequence_length, - ) - - # Adapta a camada de vetorização ao conjunto de dados de texto - vectorize_layer.adapt(train_dataset.map(lambda x, y: x)) + num_features = train_texts_tfidf.shape[1] # Define a arquitetura do modelo model = tf.keras.Sequential([ - vectorize_layer, - layers.Embedding(max_features, embedding_dim), - layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3), - layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3), - layers.GlobalMaxPooling1D(), - layers.Dense(128, activation="relu"), - layers.Dropout(0.5), - layers.Dense(num_classes, activation="softmax", name="predictions") + tf.keras.layers.Dense(64, activation='relu', input_shape=(num_features,)), + tf.keras.layers.Dense(num_classes, activation='softmax') ]) + model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) try: diff --git a/api/app.py b/api/app.py index fcf472e6..8b191d03 100644 --- a/api/app.py +++ b/api/app.py @@ -24,7 +24,6 @@ loop = asyncio.get_event_loop() def run_flask_app(): - global server_thread app.run(host='0.0.0.0', port=5000, debug=True, use_reloader=False) def shutdown_server(): @@ -73,7 +72,7 @@ def train_model(): # print("\n") print("Received data: " + str(len(selected_data))) - print("Recivied label: " + str(len(selected_label))) + print("Received label: " + str(len(selected_label))) print("Name: " + str(name)) print("Epochs: " + str(epochs)) print("Batch Size: " + str(batch_size)) @@ -105,8 +104,11 @@ def get_training_status(): except FileNotFoundError: return jsonify({'training_in_progress': False, 'training_progress': 0}) -if __name__ == '__main__': - server_thread = threading.Thread(target=run_flask_app) - server_thread.start() - atexit.register(shutdown_server) +#@app.teardown_appcontext +#def teardown_appcontext(error=None): + #shutdown_server() +if __name__ == '__main__': + app.run(host='0.0.0.0', port=5000) + #server_thread = threading.Thread(target=run_flask_app) + #server_thread.start() \ No newline at end of file diff --git a/api/available_classifiers.py b/api/available_classifiers.py index ec57c317..6541f33d 100644 --- a/api/available_classifiers.py +++ b/api/available_classifiers.py @@ -2,7 +2,7 @@ import pickle def get_available_classifiers(): - model_folder = 'api\models' + model_folder = 'api/models' # Verifica se o diretório 'models' existe if not os.path.exists(model_folder):