Merge branch 'main' of github.com:TailUFPB/LinguifAI

TailUFPB · Mar 28, 2024 · ea213f3 · ea213f3
2 parents b810cd8 + f80f5ca
commit ea213f3
Show file tree

Hide file tree

Showing 4 changed files with 33 additions and 23 deletions.
diff --git a/api/DataProcesser.py b/api/DataProcesser.py
@@ -5,6 +5,7 @@
 from available_classifiers import get_available_classifiers
 from tensorflow.python.keras.models import load_model
 
+import scipy as sp
 import pandas as pd
 import numpy as np
 import tensorflow as tf
@@ -13,6 +14,7 @@
 import joblib
 import string
 import os
+from sklearn.feature_extraction.text import TfidfVectorizer
 
 import nltk
 from nltk.corpus import stopwords
@@ -49,6 +51,7 @@ def generate_statistics(self, df):
  }
 
  return statistics
+
 
  def preprocess_text(self, text):
  text = str(text).lower()
@@ -59,9 +62,10 @@ def preprocess_text(self, text):
  text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
  text = re.sub('\n', '', text)
  text = re.sub('\w*\d\w*', '', text)
-
+ 
  return text
 
+
  def classify_emotions(self, df):
  df['output_column'] = df['input_column'].apply(make_prediction)
  return df
@@ -86,7 +90,7 @@ def pretrained_predict(self, df, model_name):
 
  def load_weights_and_model(self, name):
  model_filename = f"api/models/{name}"
- num_classes = model_filename[model_filename.index("/") + 1, model_filename.index("-")]
+ num_classes = model_filename[model_filename.index("s") + 2 : model_filename.index("-")]
  model = tf.keras.Sequential([
  tf.keras.layers.Embedding(input_dim=20000, output_dim=128),
  tf.keras.layers.LSTM(64),
@@ -98,20 +102,31 @@ def load_weights_and_model(self, name):
  def trained_predict(self, df, model_name):
  model = self.load_weights_and_model(model_name)
 
- encoder_re = r'Trained-Model-(.*?).keras'
- encoder_name = re.search(encoder_re, model_name).group(1)
+
+ encoder_name = model_name[model_name.index('l') + 2 : model_name.index('.')]
 
  label_map_filename = f"api\encoders/LabelMapping-{encoder_name}.joblib"
  label_encoder = joblib.load(label_map_filename)
 
  raw_text = df['input_column'].tolist()
- test_texts = [self.preprocess_text(text) for text in raw_text]
+
+ # prediction (nao sei como fazer agora) 
+ # vectorizer = TfidfVectorizer(max_features=20000)
+ # raw_text = [self.preprocess_text(text).encode("utf-8") for text in raw_text]
+ # vectorizer.fit_transform(raw_text)
+ # vectorized_data = vectorizer.transform(raw_text)
+
+ # vectorized_data = np.asarray(vectorized_data.todense())
+
+ # # Make predictions using the model
+
+ # predictions = model.predict(vectorized_data)
+
+ # predicted_labels_encoded = tf.argmax(predictions, axis=1).numpy()
 
- predictions = model.predict(test_texts)
- predicted_labels_encoded = tf.argmax(predictions, axis=1).numpy()
- predicted_labels = [label_encoder.classes_[label] for label in predicted_labels_encoded]
+ # predicted_labels = [label_encoder.classes_[label] for label in predicted_labels_encoded]
 
- df['output_column'] = predicted_labels
+ # df['output_column'] = predicted_labels
 
  return df
 

diff --git a/api/Neural_Network2.py b/api/Neural_Network2.py
@@ -79,7 +79,6 @@ def create_and_train_model(train_texts, train_labels, name, epochs=5, batch_size
  num_classes = len(label_encoder.classes_)
  train_labels_one_hot = tf.keras.utils.to_categorical(train_labels_encoded, num_classes=num_classes)
 
- #label_mapping_file = os.path.join(dirname, rf"api/encoders/LabelMapping-{name}.joblib")
  label_mapping_file = f"api/encoders/LabelMapping-{name}.joblib"
  joblib.dump(label_encoder, label_mapping_file)
 
@@ -88,40 +87,36 @@ def create_and_train_model(train_texts, train_labels, name, epochs=5, batch_size
  train_texts = [preprocess_text(text) for text in train_texts]
  train_texts_tfidf = tfidf_vectorizer.fit_transform(train_texts)
 
- # Cria um conjunto de dados de texto usando a API de conjuntos de dados do TensorFlow
  train_dataset = tf.data.Dataset.from_tensor_slices((train_texts_tfidf.toarray(), train_labels_one_hot))
-
- # Embaralha e agrupa os dados
  train_dataset = train_dataset.shuffle(len(train_texts)).batch(32)
 
  # Parâmetros do modelo
  num_features = train_texts_tfidf.shape[1]
 
  # Define a arquitetura do modelo
  model = tf.keras.Sequential([
- tf.keras.layers.Dense(64, activation='relu', input_shape=(num_features,)),
+ tf.keras.layers.Embedding(input_dim=num_features, output_dim=64),
+ tf.keras.layers.SimpleRNN(64),
  tf.keras.layers.Dense(num_classes, activation='softmax')
  ])
-
+ 
  model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
 
  try:
  progress_callback = TrainingProgressCallback()
- # Treina o modelo
  history = model.fit(train_dataset, epochs=epochs, batch_size=batch_size, callbacks=[progress_callback])
 
- # Salva o modelo
- model_filename = f"api/models/Trained-Model-{name}.keras"
- model.save(model_filename)
+ model_filename = f"api/models/{str(num_classes)}-Trained-Model-{name}.weights.h5"
+ model.save_weights(model_filename)
 
- # Obtém estatísticas do treinamento
  training_stats = {
  "loss": history.history['loss'],
  "accuracy": history.history['accuracy']
  }
 
- # Retorna estatísticas como JSON
  return json.dumps(training_stats)
 
  except Exception as e:
- return f"Error during model creation/training: {str(e)}"
+ return f"Error during model creation/training: {str(e)}"
+
+
diff --git a/api/available_classifiers.py b/api/available_classifiers.py
@@ -13,7 +13,7 @@ def get_available_classifiers():
  classifiers = {}
 
  for file in model_files:
- if file.endswith('.pkl') or file.endswith('.keras'):
+ if file.endswith('.pkl') or file.endswith('.keras') or file.endswith('.h5'):
  classifiers[len(classifiers)] = file
 
  return classifiers
diff --git a/requirements.txt b/requirements.txt