Skip to content

Commit

Permalink
Merge branch 'develop' into style/new-theme
Browse files Browse the repository at this point in the history
  • Loading branch information
tahaluh committed Jun 8, 2024
2 parents 053043a + 5dfd7e6 commit f6cfa68
Show file tree
Hide file tree
Showing 6 changed files with 223 additions and 37 deletions.
21 changes: 15 additions & 6 deletions api/DataProcesser.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import torch
from collections import Counter
from functools import partial
import pickle

import nltk
from nltk.tokenize import wordpunct_tokenize
Expand All @@ -35,8 +36,11 @@ def handle_classify(self, df, classifier):
classifier_switcher = get_available_classifiers() # id: nome_arquivo
model_name = classifier_switcher[classifier]
if model_name.endswith('.pkl'):
pipeline = self.get_pipeline(model_name)
return self.pretrained_predict(df, pipeline)
pipeline, custom = self.get_pipeline(model_name)
if custom:
return self.pretrained_predict(df, pipeline, model_name)
else:
return self.pretrained_predict(df, pipeline)
else:
return self.trained_predict(df, model_name)
#classifier_switcher = {
Expand All @@ -58,8 +62,9 @@ def get_pipeline(self, model_name):
df = pd.read_csv('api/training_df/nb_news.csv')
train_data, test_data, train_target, test_target = train_test_split(df['short_description'], df['category'], test_size=0.2, shuffle=True)
else:
return None
return make_pipeline(TfidfVectorizer(), MultinomialNB()).fit(train_data, train_target)
with open(f'api/models/{model_name}', 'rb') as file:
return pickle.load(file), True
return make_pipeline(TfidfVectorizer(), MultinomialNB()).fit(train_data, train_target), False

def generate_statistics(self, df):
unique_labels = df['output_column'].unique()
Expand Down Expand Up @@ -95,11 +100,15 @@ def nb_news_application(self, df):
df['output_column'] = df['input_column'].apply(news_prediction)
return df

def pretrained_predict(self, df, pipeline):
def pretrained_predict(self, df, pipeline, model_name = None):
if model_name:
label_map_filename = f"api/encoders/LabelMapping-{model_name.split('_')[0]}.joblib"
label_encoder = joblib.load(label_map_filename)
texts_to_predict = df['input_column']
texts_to_predict = [str(text) for text in texts_to_predict]
predictions = pipeline.predict(texts_to_predict)
df['output_column'] = predictions
label_predictions = label_encoder.inverse_transform(predictions)
df['output_column'] = label_predictions
return df

def load_weights_and_model(self, name):
Expand Down
103 changes: 103 additions & 0 deletions api/Neural_Network.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import re
import os
import joblib
import json
import pickle
from functools import partial
from collections import Counter

import pandas as pd
from nltk.corpus import stopwords
from nltk import wordpunct_tokenize
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

tqdm.pandas()

import nltk
nltk.download('stopwords')

def tokenize(text, stop_words):
text = str(text)
text = re.sub(r'[^\w\s]', '', text)
text = text.lower()
tokens = wordpunct_tokenize(text)
tokens = [token for token in tokens if token not in stop_words]
return tokens

class CustomDataset:
def __init__(self, df, max_vocab, max_len, name):
stop_words = set(stopwords.words('english'))

df['tokens'] = df.input_text.progress_apply(
partial(tokenize, stop_words=stop_words),
)

all_tokens = [token for tokens in df.tokens.tolist() for token in tokens]
common_tokens = set([token for token, _ in Counter(all_tokens).most_common(max_vocab)])
df['tokens'] = df.tokens.progress_apply(
partial(remove_rare_words, common_tokens=common_tokens, max_len=max_len),
)

df = df[df.tokens.progress_apply(lambda tokens: any(token != '<UNK>' for token in tokens))]

df['clean_text'] = df.tokens.apply(lambda tokens: ' '.join(tokens))

self.text = df.clean_text.tolist()
self.labels = df.labels.tolist()

label_encoder = LabelEncoder()
self.encoded_labels = label_encoder.fit_transform(df['labels'])
encoder_name = f"LabelMapping-{name}.joblib"
encoder_filename = os.path.join("api", "encoders", encoder_name)
os.makedirs(os.path.dirname(encoder_filename), exist_ok=True)
joblib.dump(label_encoder, encoder_filename)

def remove_rare_words(tokens, common_tokens, max_len):
return [token if token in common_tokens else '<UNK>' for token in tokens][-max_len:]

def create_and_train_nb_model(df, name, epochs = 10, batch_size = 16, learning_rate = 0.001, valid_ratio=0.05, test_ratio=0.05):
max_vocab = 20000
max_len = 200

dataset = CustomDataset(df, max_vocab, max_len, name)

X_train, X_temp, y_train, y_temp = train_test_split(
dataset.text, dataset.encoded_labels, test_size=valid_ratio + test_ratio, random_state=42)
X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=test_ratio / (valid_ratio + test_ratio), random_state=42)

# Creating a pipeline with TF-IDF vectorizer and Multinomial Naive Bayes
pipeline = make_pipeline(TfidfVectorizer(max_features=max_vocab), MultinomialNB())

# Fitting the model to the training data
pipeline.fit(X_train, y_train)

# Evaluating on validation set
valid_preds = pipeline.predict(X_valid)
valid_acc = accuracy_score(y_valid, valid_preds)
print(f'Validation Accuracy: {valid_acc:.2f}')
print(f'Validation Report:\n{classification_report(y_valid, valid_preds)}')

# Evaluating on test set
test_preds = pipeline.predict(X_test)
test_acc = accuracy_score(y_test, test_preds)
print(f'Test Accuracy: {test_acc:.2f}')
print(f'Test Report:\n{classification_report(y_test, test_preds)}')

# Saving the pipeline to a file
model_path = os.path.join('api', 'models', f"{name}_pipeline.pkl")
os.makedirs(os.path.dirname(model_path), exist_ok=True)
with open(model_path, "wb") as model_file:
pickle.dump(pipeline, model_file)

training_progress = {
'training_progress': 0,
'training_in_progress': False
}
with open('training_progress.json', 'w') as file:
json.dump(training_progress, file)
3 changes: 1 addition & 2 deletions api/Neural_Network2.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,8 +312,7 @@ def validate_epoch(model, valid_loader, criterion):

return total_loss / total


def create_and_train_model(df, name, epochs=10, batch_size=32, learning_rate=0.001):
def create_and_train_rnn_model(df, name, epochs = 10, batch_size = 32, learning_rate = 0.001):
# Configurações iniciais e preparações do modelo
dropout_probability = 0.2
n_rnn_layers = 1
Expand Down
47 changes: 43 additions & 4 deletions api/app.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from flask import Flask, jsonify, request
from flask_cors import CORS
from DataProcesser import DataProcesser
from Neural_Network2 import create_and_train_model
from Neural_Network2 import create_and_train_rnn_model
from Neural_Network import create_and_train_nb_model
from available_classifiers import get_available_classifiers

import time
Expand Down Expand Up @@ -63,8 +64,8 @@ def shutdown():
shutdown_server()
return 'Server shutting down...'

@app.route('/neural-network', methods=["POST"])
def train_model():
@app.route('/neural-network-rnn', methods=["POST"])
def train_rnn_model():
received_data = request.json

if received_data:
Expand Down Expand Up @@ -98,7 +99,45 @@ def train_model():

df = pd.DataFrame({'input_text': selected_data, 'labels': selected_label})

create_and_train_model(df, name, epochs, batch_size, learning_rate)
create_and_train_rnn_model(df, name, epochs, batch_size, learning_rate)

return jsonify({"message": "Model train started successfully."}), 200

@app.route('/neural-network-nb', methods=["POST"])
def train_nb_model():
received_data = request.json

if received_data:
selected_data = received_data.get('data')
selected_label = received_data.get('label')
epochs = received_data.get('epochs')
batch_size = received_data.get('batch_size')
learning_rate = received_data.get('learning_rate')
name = received_data.get('name')

#
print("\n")
print("Received data: " + str(len(selected_data)))
print("Received label: " + str(len(selected_label)))
print("Name: " + str(name))
print("Epochs: " + str(epochs))
print("Batch Size: " + str(batch_size))
print("Learning Rate: " + str(learning_rate))
print("\n")
else:
return jsonify({"message": "No data received."}), 400

# reseta status
training_progress = {
'training_progress': 0,
'training_in_progress': True
}
with open('training_progress.json', 'w') as file:
json.dump(training_progress, file)

df = pd.DataFrame({'input_text': selected_data, 'labels': selected_label})

create_and_train_nb_model(df, name, epochs, batch_size, learning_rate)

return jsonify({"message": "Model train started successfully."}), 200

Expand Down
3 changes: 1 addition & 2 deletions src/pages/train.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ import SelectFileCard from "../components/selectFileCard/selectFileCard";
import axios from "axios";
import ResultTable from "../components/resultTable/resultTable";
import { Menu } from "../components/menu/menu";
import ReactApexChart from "react-apexcharts";
import { ReactApexChartsDefaultOptions } from "../Shared/apexChartsOptions";
import Layout from "./layout/layout";
import TrainView from "./views/trainView";
Expand All @@ -13,4 +12,4 @@ export default function Train() {
return (
<Layout><TrainView /></Layout>
);
}
}
83 changes: 60 additions & 23 deletions src/pages/views/trainView.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ export default function TrainView() {
setIsCancelling(false);
};

const handleSubmit = async () => {
const handleRnnSubmit = async () => {
setIsLoading(true);
setHasTrained(true);
setLoadingProgress(0);
Expand All @@ -63,25 +63,55 @@ export default function TrainView() {

console.log(sendData);

const url = "http://localhost:5000/neural-network";

await axios
.post(url, sendData)
.catch(async (error) => {
await axios
.post(url, sendData)
.catch(async (error) => {
await axios
.post(url, sendData)
.catch(async (error) => {
await axios
.post(url, sendData)
.catch((error) => {
throw new Error(error);
})
})
})
const url = "http://localhost:5000/neural-network-rnn";

await axios.post(url, sendData).catch(async (error) => {
await axios.post(url, sendData).catch(async (error) => {
await axios.post(url, sendData).catch(async (error) => {
await axios.post(url, sendData).catch((error) => {
throw new Error(error);
});
});
});
});

setIsLoading(false);
};

const handleNbSubmit = async () => {
setIsLoading(true);
setLoadingProgress(0);

let selectedData = data.map((row) => ({
value: row[selectedColumn],
label: row[selectedLabel],
}));

let selectedLabels = data.map((row) => row[selectedLabel]);
let selectedValues = data.map((row) => row[selectedColumn]);

const sendData = {
data: selectedValues,
label: selectedLabels,
batch_size: batchSize || 16,
epochs: epochs || 50,
learning_rate: learningRate || 0.001,
name: modelName || "trained-model",
};

console.log(sendData);

const url = "http://localhost:5000/neural-network-nb";

await axios.post(url, sendData).catch(async (error) => {
await axios.post(url, sendData).catch(async (error) => {
await axios.post(url, sendData).catch(async (error) => {
await axios.post(url, sendData).catch((error) => {
throw new Error(error);
});
});
});
});

setIsLoading(false);
};
Expand Down Expand Up @@ -131,9 +161,14 @@ export default function TrainView() {
useEffect(() => {
const fetchData = async () => {
try {
const response = await axios.get("http://localhost:5000/training-status");
const response = await axios.get(
"http://localhost:5000/training-status"
);
const { training_progress, training_in_progress, train_losses, valid_losses } = response.data;
const newProgress: number = training_in_progress || training_progress === 100 ? training_progress : 0;
const newProgress: number =
training_in_progress || training_progress === 100
? training_progress
: 0; // Explicitly type newProgress
updateLoadingProgress(newProgress);

setTrainLosses(train_losses);
Expand All @@ -144,14 +179,16 @@ export default function TrainView() {
};

const updateLoadingProgress = (newProgress: number) => {
// Explicitly type newProgress parameter
const duration = 1000;
const startTime = Date.now();
const startProgress = prevLoadingProgressRef.current;

const updateProgress = () => {
const elapsedTime = Date.now() - startTime;
const progress = Math.min(1, elapsedTime / duration);
const interpolatedProgress = startProgress + (newProgress - startProgress) * progress;
const interpolatedProgress =
startProgress + (newProgress - startProgress) * progress;
setLoadingProgress(interpolatedProgress);

if (progress < 1) {
Expand Down Expand Up @@ -295,7 +332,7 @@ export default function TrainView() {

{!isLoading && <button
className={`w-2/4 bg-blue-400 text-white py-2 px-4 hover:bg-blue-500 focus:outline-none border-2 border-blue-500 rounded-xl h-14`}
onClick={handleSubmit}
onClick={handleNbSubmit}
disabled={isLoading}
>
{isLoading ? "Carregando..." : "Treinar"}
Expand Down

0 comments on commit f6cfa68

Please sign in to comment.