Skip to content

Commit e6381c1

Browse files
Merge pull request #6 from TailUFPB/nn-fakenews
Custom Classifier Training
2 parents d6825a1 + 88ca86c commit e6381c1

File tree

3 files changed

+145
-0
lines changed

3 files changed

+145
-0
lines changed

api/Neural_Network2.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
import json
2+
import re
3+
import string
4+
import pandas as pd
5+
import numpy as np
6+
import tensorflow as tf
7+
from tensorflow.keras import layers
8+
from tensorflow.keras.layers import TextVectorization
9+
from tensorflow.keras.models import Sequential
10+
from sklearn.preprocessing import LabelEncoder
11+
12+
def preprocess_text(text):
13+
text = text.lower()
14+
text = re.sub('\[.*?\]', '', text)
15+
text = re.sub("\\W", " ", text)
16+
text = re.sub('https?://\S+|www\.\S+', '', text)
17+
text = re.sub('<.*?>+', '', text)
18+
text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
19+
text = re.sub('\n', '', text)
20+
text = re.sub('\w*\d\w*', '', text)
21+
return text
22+
23+
def create_and_train_model(train_texts, train_labels, name, epochs=5):
24+
label_encoder = LabelEncoder()
25+
train_labels_encoded = label_encoder.fit_transform(train_labels)
26+
27+
num_classes = len(label_encoder.classes_)
28+
train_labels_one_hot = tf.keras.utils.to_categorical(train_labels_encoded, num_classes=num_classes)
29+
30+
print(train_texts)
31+
print(train_labels_one_hot)
32+
33+
# Aplica o pré-processamento aos textos
34+
#train_df['text'] = train_df['text'].apply(preprocess_text)
35+
36+
# Cria um conjunto de dados de texto usando a API de conjuntos de dados do TensorFlow
37+
train_dataset = tf.data.Dataset.from_tensor_slices((train_texts, train_labels_one_hot))
38+
39+
# Embaralha e agrupa os dados
40+
train_dataset = train_dataset.shuffle(len(train_texts)).batch(32)
41+
42+
# Parâmetros do modelo
43+
max_features = 20000
44+
embedding_dim = 128
45+
sequence_length = 500
46+
47+
# Cria uma camada de vetorização de texto
48+
vectorize_layer = TextVectorization(
49+
max_tokens=max_features,
50+
output_mode="int",
51+
output_sequence_length=sequence_length,
52+
)
53+
54+
# Adapta a camada de vetorização ao conjunto de dados de texto
55+
vectorize_layer.adapt(train_dataset.map(lambda x, y: x))
56+
57+
# Função para vetorizar o texto e manter os rótulos
58+
def vectorize_text(text, label):
59+
text = tf.expand_dims(text, -1)
60+
return vectorize_layer(text), label
61+
62+
# Aplica a vetorização ao conjunto de dados de treino
63+
train_ds = train_dataset.map(vectorize_text)
64+
train_ds = train_ds.cache().prefetch(buffer_size=tf.data.AUTOTUNE)
65+
66+
try:
67+
# Define a arquitetura do modelo
68+
inputs = tf.keras.Input(shape=(sequence_length,), dtype="int64")
69+
x = layers.Embedding(max_features, embedding_dim)(inputs)
70+
x = layers.Dropout(0.5)(x)
71+
x = layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3)(x)
72+
x = layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3)(x)
73+
x = layers.GlobalMaxPooling1D()(x)
74+
x = layers.Dense(128, activation="relu")(x)
75+
x = layers.Dropout(0.5)(x)
76+
predictions = layers.Dense(num_classes, activation="softmax", name="predictions")(x)
77+
78+
# Cria e compila o modelo
79+
model = tf.keras.Model(inputs, predictions)
80+
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
81+
82+
# Treina o modelo
83+
history = model.fit(train_ds, epochs=epochs)
84+
85+
# Salva o modelo
86+
model_filename = f"models/Trained-Model-{name}.keras"
87+
model.save(model_filename)
88+
89+
# Obtém estatísticas do treinamento
90+
training_stats = {
91+
"loss": history.history['loss'],
92+
"accuracy": history.history['accuracy']
93+
}
94+
95+
# Retorna estatísticas como JSON
96+
return json.dumps(training_stats)
97+
98+
except Exception as e:
99+
return f"Error during model creation/training: {str(e)}"
100+
101+
'''
102+
Com o nome do arquivo podemos fazer por exemplo:
103+
104+
saved_model_filename = Neural_Network(texts_train, labels_train)
105+
106+
Carregar o modelo treinado a partir do arquivo:
107+
with open(saved_model_filename, "rb") as model_file:
108+
loaded_model = pickle.load(model_file)
109+
110+
Agora, podemos usar loaded_model para fazer previsões, por exemplo:
111+
predictions = loaded_model.predict(new_texts)
112+
113+
'''
114+
'''
115+
TESTE:
116+
117+
df_true = pd.read_csv("Linguifai/api/training_df/True.csv")
118+
df_fake = pd.read_csv("Linguifai/api/training_df/Fake.csv")
119+
120+
121+
df_fake = df_fake.drop(['title', 'subject', 'date'], axis=1)
122+
df_true = df_true.drop(['title', 'subject', 'date'], axis=1)
123+
124+
125+
df_fake['text'] = df_fake["text"]
126+
df_true['text'] = df_true["text"]
127+
128+
df_fake_train = df_fake[:5000]
129+
df_true_train = df_true[:5000]
130+
131+
textos = df_fake_train['text'].tolist() + df_true_train['text'].tolist()
132+
labels = [0] * len(df_fake_train) + [1] * len(df_true_train)
133+
134+
create_and_train_model(textos,labels,"Teste")
135+
136+
'''

api/Trained-Model-Route-Testing.h5

32.2 MB
Binary file not shown.

api/app.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from flask import Flask, jsonify, request
22
from flask_cors import CORS
33
from DataProcesser import DataProcesser
4+
from Neural_Network2 import create_and_train_model
45
from available_classifiers import get_available_classifiers
56

67
import os
@@ -54,6 +55,14 @@ def shutdown():
5455
shutdown_server()
5556
return 'Server shutting down...'
5657

58+
@app.route('/neural-network',methods=["POST"])
59+
def train_model():
60+
received_data = request.get_json()
61+
selected_data = received_data.get('data')
62+
selected_label = received_data.get('label')
63+
name = received_data.get('name')
64+
return create_and_train_model(selected_data,selected_label,name)
65+
5766
if __name__ == '__main__':
5867
server_thread = threading.Thread(target=run_flask_app)
5968
server_thread.start()

0 commit comments

Comments
 (0)