Skip to content

Commit

Permalink
Adicionando novo modelo
Browse files Browse the repository at this point in the history
  • Loading branch information
Thiago committed Nov 22, 2023
1 parent 90cb15a commit c20c545
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 0 deletions.
17 changes: 17 additions & 0 deletions api/NN_Fakenews_2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import pandas as pd
import pickle
from Neural_Network2 import create_and_train_model


def Neural_Network(texts_train,labels_train):
trained_model = create_and_train_model(texts_train, labels_train)

# Saving the model
with open("nn_fakenews_model.pkl", "wb") as model_file:
pickle.dump(trained_model, model_file)


return trained_model



79 changes: 79 additions & 0 deletions api/Neural_Network2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import csv
import re
import string
import os
import pandas as pd
import numpy as np
import tensorflow as tf
import pickle
from tensorflow.keras import layers
from tensorflow.keras.layers import TextVectorization
from tensorflow.keras.models import Sequential

def preprocess_text(text):
text = text.lower()
text = re.sub('\[.*?\]', '', text)
text = re.sub("\\W", " ", text)
text = re.sub('https?://\S+|www\.\S+', '', text)
text = re.sub('<.*?>+', '', text)
text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
text = re.sub('\n', '', text)
text = re.sub('\w*\d\w*', '', text)
return text

def create_and_train_model(train_texts, train_labels, epochs=5):
train_df = pd.DataFrame({'text': train_texts, 'label': train_labels})
train_df['text'] = train_df['text'].apply(preprocess_text)

output_directory = 'arquivos_texto_treino_nn'
os.makedirs(output_directory, exist_ok=True)

for index, row in train_df.iterrows():
filename = os.path.join(output_directory, f'texto_{index}.txt')
with open(filename, 'w', encoding='utf-8') as file:
file.write(row['text'])

treino_dir = output_directory

train_dataset = tf.keras.utils.text_dataset_from_directory(
treino_dir,
batch_size=32,
shuffle=True,
)

max_features = 20000
embedding_dim = 128
sequence_length = 500

vectorize_layer = TextVectorization(
max_tokens=max_features,
output_mode="int",
output_sequence_length=sequence_length,
)

text_ds = train_dataset.map(lambda x, y: x)
vectorize_layer.adapt(text_ds)

def vectorize_text(text, label):
text = tf.expand_dims(text, -1)
return vectorize_layer(text), label

train_ds = train_dataset.map(vectorize_text)
train_ds = train_ds.cache().prefetch(buffer_size=10)

inputs = tf.keras.Input(shape=(None,), dtype="int64")
x = layers.Embedding(max_features, embedding_dim)(inputs)
x = layers.Dropout(0.5)(x)
x = layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3)(x)
x = layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3)(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dense(128, activation="relu")(x)
x = layers.Dropout(0.5)(x)
predictions = layers.Dense(1, activation="sigmoid", name="predictions")(x)

model = tf.keras.Model(inputs, predictions)

model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
model.fit(train_ds, epochs=epochs)

return model

0 comments on commit c20c545

Please sign in to comment.