Skip to content

Commit f6cfa68

Browse files
committed
Merge branch 'develop' into style/new-theme
2 parents 053043a + 5dfd7e6 commit f6cfa68

File tree

6 files changed

+223
-37
lines changed

6 files changed

+223
-37
lines changed

api/DataProcesser.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import torch
2020
from collections import Counter
2121
from functools import partial
22+
import pickle
2223

2324
import nltk
2425
from nltk.tokenize import wordpunct_tokenize
@@ -35,8 +36,11 @@ def handle_classify(self, df, classifier):
3536
classifier_switcher = get_available_classifiers() # id: nome_arquivo
3637
model_name = classifier_switcher[classifier]
3738
if model_name.endswith('.pkl'):
38-
pipeline = self.get_pipeline(model_name)
39-
return self.pretrained_predict(df, pipeline)
39+
pipeline, custom = self.get_pipeline(model_name)
40+
if custom:
41+
return self.pretrained_predict(df, pipeline, model_name)
42+
else:
43+
return self.pretrained_predict(df, pipeline)
4044
else:
4145
return self.trained_predict(df, model_name)
4246
#classifier_switcher = {
@@ -58,8 +62,9 @@ def get_pipeline(self, model_name):
5862
df = pd.read_csv('api/training_df/nb_news.csv')
5963
train_data, test_data, train_target, test_target = train_test_split(df['short_description'], df['category'], test_size=0.2, shuffle=True)
6064
else:
61-
return None
62-
return make_pipeline(TfidfVectorizer(), MultinomialNB()).fit(train_data, train_target)
65+
with open(f'api/models/{model_name}', 'rb') as file:
66+
return pickle.load(file), True
67+
return make_pipeline(TfidfVectorizer(), MultinomialNB()).fit(train_data, train_target), False
6368

6469
def generate_statistics(self, df):
6570
unique_labels = df['output_column'].unique()
@@ -95,11 +100,15 @@ def nb_news_application(self, df):
95100
df['output_column'] = df['input_column'].apply(news_prediction)
96101
return df
97102

98-
def pretrained_predict(self, df, pipeline):
103+
def pretrained_predict(self, df, pipeline, model_name = None):
104+
if model_name:
105+
label_map_filename = f"api/encoders/LabelMapping-{model_name.split('_')[0]}.joblib"
106+
label_encoder = joblib.load(label_map_filename)
99107
texts_to_predict = df['input_column']
100108
texts_to_predict = [str(text) for text in texts_to_predict]
101109
predictions = pipeline.predict(texts_to_predict)
102-
df['output_column'] = predictions
110+
label_predictions = label_encoder.inverse_transform(predictions)
111+
df['output_column'] = label_predictions
103112
return df
104113

105114
def load_weights_and_model(self, name):

api/Neural_Network.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
import re
2+
import os
3+
import joblib
4+
import json
5+
import pickle
6+
from functools import partial
7+
from collections import Counter
8+
9+
import pandas as pd
10+
from nltk.corpus import stopwords
11+
from nltk import wordpunct_tokenize
12+
from tqdm import tqdm
13+
from sklearn.preprocessing import LabelEncoder
14+
from sklearn.feature_extraction.text import TfidfVectorizer
15+
from sklearn.naive_bayes import MultinomialNB
16+
from sklearn.pipeline import make_pipeline
17+
from sklearn.model_selection import train_test_split
18+
from sklearn.metrics import accuracy_score, classification_report
19+
20+
tqdm.pandas()
21+
22+
import nltk
23+
nltk.download('stopwords')
24+
25+
def tokenize(text, stop_words):
26+
text = str(text)
27+
text = re.sub(r'[^\w\s]', '', text)
28+
text = text.lower()
29+
tokens = wordpunct_tokenize(text)
30+
tokens = [token for token in tokens if token not in stop_words]
31+
return tokens
32+
33+
class CustomDataset:
34+
def __init__(self, df, max_vocab, max_len, name):
35+
stop_words = set(stopwords.words('english'))
36+
37+
df['tokens'] = df.input_text.progress_apply(
38+
partial(tokenize, stop_words=stop_words),
39+
)
40+
41+
all_tokens = [token for tokens in df.tokens.tolist() for token in tokens]
42+
common_tokens = set([token for token, _ in Counter(all_tokens).most_common(max_vocab)])
43+
df['tokens'] = df.tokens.progress_apply(
44+
partial(remove_rare_words, common_tokens=common_tokens, max_len=max_len),
45+
)
46+
47+
df = df[df.tokens.progress_apply(lambda tokens: any(token != '<UNK>' for token in tokens))]
48+
49+
df['clean_text'] = df.tokens.apply(lambda tokens: ' '.join(tokens))
50+
51+
self.text = df.clean_text.tolist()
52+
self.labels = df.labels.tolist()
53+
54+
label_encoder = LabelEncoder()
55+
self.encoded_labels = label_encoder.fit_transform(df['labels'])
56+
encoder_name = f"LabelMapping-{name}.joblib"
57+
encoder_filename = os.path.join("api", "encoders", encoder_name)
58+
os.makedirs(os.path.dirname(encoder_filename), exist_ok=True)
59+
joblib.dump(label_encoder, encoder_filename)
60+
61+
def remove_rare_words(tokens, common_tokens, max_len):
62+
return [token if token in common_tokens else '<UNK>' for token in tokens][-max_len:]
63+
64+
def create_and_train_nb_model(df, name, epochs = 10, batch_size = 16, learning_rate = 0.001, valid_ratio=0.05, test_ratio=0.05):
65+
max_vocab = 20000
66+
max_len = 200
67+
68+
dataset = CustomDataset(df, max_vocab, max_len, name)
69+
70+
X_train, X_temp, y_train, y_temp = train_test_split(
71+
dataset.text, dataset.encoded_labels, test_size=valid_ratio + test_ratio, random_state=42)
72+
X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=test_ratio / (valid_ratio + test_ratio), random_state=42)
73+
74+
# Creating a pipeline with TF-IDF vectorizer and Multinomial Naive Bayes
75+
pipeline = make_pipeline(TfidfVectorizer(max_features=max_vocab), MultinomialNB())
76+
77+
# Fitting the model to the training data
78+
pipeline.fit(X_train, y_train)
79+
80+
# Evaluating on validation set
81+
valid_preds = pipeline.predict(X_valid)
82+
valid_acc = accuracy_score(y_valid, valid_preds)
83+
print(f'Validation Accuracy: {valid_acc:.2f}')
84+
print(f'Validation Report:\n{classification_report(y_valid, valid_preds)}')
85+
86+
# Evaluating on test set
87+
test_preds = pipeline.predict(X_test)
88+
test_acc = accuracy_score(y_test, test_preds)
89+
print(f'Test Accuracy: {test_acc:.2f}')
90+
print(f'Test Report:\n{classification_report(y_test, test_preds)}')
91+
92+
# Saving the pipeline to a file
93+
model_path = os.path.join('api', 'models', f"{name}_pipeline.pkl")
94+
os.makedirs(os.path.dirname(model_path), exist_ok=True)
95+
with open(model_path, "wb") as model_file:
96+
pickle.dump(pipeline, model_file)
97+
98+
training_progress = {
99+
'training_progress': 0,
100+
'training_in_progress': False
101+
}
102+
with open('training_progress.json', 'w') as file:
103+
json.dump(training_progress, file)

api/Neural_Network2.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -312,8 +312,7 @@ def validate_epoch(model, valid_loader, criterion):
312312

313313
return total_loss / total
314314

315-
316-
def create_and_train_model(df, name, epochs=10, batch_size=32, learning_rate=0.001):
315+
def create_and_train_rnn_model(df, name, epochs = 10, batch_size = 32, learning_rate = 0.001):
317316
# Configurações iniciais e preparações do modelo
318317
dropout_probability = 0.2
319318
n_rnn_layers = 1

api/app.py

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
from flask import Flask, jsonify, request
22
from flask_cors import CORS
33
from DataProcesser import DataProcesser
4-
from Neural_Network2 import create_and_train_model
4+
from Neural_Network2 import create_and_train_rnn_model
5+
from Neural_Network import create_and_train_nb_model
56
from available_classifiers import get_available_classifiers
67

78
import time
@@ -63,8 +64,8 @@ def shutdown():
6364
shutdown_server()
6465
return 'Server shutting down...'
6566

66-
@app.route('/neural-network', methods=["POST"])
67-
def train_model():
67+
@app.route('/neural-network-rnn', methods=["POST"])
68+
def train_rnn_model():
6869
received_data = request.json
6970

7071
if received_data:
@@ -98,7 +99,45 @@ def train_model():
9899

99100
df = pd.DataFrame({'input_text': selected_data, 'labels': selected_label})
100101

101-
create_and_train_model(df, name, epochs, batch_size, learning_rate)
102+
create_and_train_rnn_model(df, name, epochs, batch_size, learning_rate)
103+
104+
return jsonify({"message": "Model train started successfully."}), 200
105+
106+
@app.route('/neural-network-nb', methods=["POST"])
107+
def train_nb_model():
108+
received_data = request.json
109+
110+
if received_data:
111+
selected_data = received_data.get('data')
112+
selected_label = received_data.get('label')
113+
epochs = received_data.get('epochs')
114+
batch_size = received_data.get('batch_size')
115+
learning_rate = received_data.get('learning_rate')
116+
name = received_data.get('name')
117+
118+
#
119+
print("\n")
120+
print("Received data: " + str(len(selected_data)))
121+
print("Received label: " + str(len(selected_label)))
122+
print("Name: " + str(name))
123+
print("Epochs: " + str(epochs))
124+
print("Batch Size: " + str(batch_size))
125+
print("Learning Rate: " + str(learning_rate))
126+
print("\n")
127+
else:
128+
return jsonify({"message": "No data received."}), 400
129+
130+
# reseta status
131+
training_progress = {
132+
'training_progress': 0,
133+
'training_in_progress': True
134+
}
135+
with open('training_progress.json', 'w') as file:
136+
json.dump(training_progress, file)
137+
138+
df = pd.DataFrame({'input_text': selected_data, 'labels': selected_label})
139+
140+
create_and_train_nb_model(df, name, epochs, batch_size, learning_rate)
102141

103142
return jsonify({"message": "Model train started successfully."}), 200
104143

src/pages/train.tsx

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ import SelectFileCard from "../components/selectFileCard/selectFileCard";
33
import axios from "axios";
44
import ResultTable from "../components/resultTable/resultTable";
55
import { Menu } from "../components/menu/menu";
6-
import ReactApexChart from "react-apexcharts";
76
import { ReactApexChartsDefaultOptions } from "../Shared/apexChartsOptions";
87
import Layout from "./layout/layout";
98
import TrainView from "./views/trainView";
@@ -13,4 +12,4 @@ export default function Train() {
1312
return (
1413
<Layout><TrainView /></Layout>
1514
);
16-
}
15+
}

src/pages/views/trainView.tsx

Lines changed: 60 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ export default function TrainView() {
3737
setIsCancelling(false);
3838
};
3939

40-
const handleSubmit = async () => {
40+
const handleRnnSubmit = async () => {
4141
setIsLoading(true);
4242
setHasTrained(true);
4343
setLoadingProgress(0);
@@ -63,25 +63,55 @@ export default function TrainView() {
6363

6464
console.log(sendData);
6565

66-
const url = "http://localhost:5000/neural-network";
67-
68-
await axios
69-
.post(url, sendData)
70-
.catch(async (error) => {
71-
await axios
72-
.post(url, sendData)
73-
.catch(async (error) => {
74-
await axios
75-
.post(url, sendData)
76-
.catch(async (error) => {
77-
await axios
78-
.post(url, sendData)
79-
.catch((error) => {
80-
throw new Error(error);
81-
})
82-
})
83-
})
66+
const url = "http://localhost:5000/neural-network-rnn";
67+
68+
await axios.post(url, sendData).catch(async (error) => {
69+
await axios.post(url, sendData).catch(async (error) => {
70+
await axios.post(url, sendData).catch(async (error) => {
71+
await axios.post(url, sendData).catch((error) => {
72+
throw new Error(error);
73+
});
74+
});
75+
});
76+
});
77+
78+
setIsLoading(false);
79+
};
80+
81+
const handleNbSubmit = async () => {
82+
setIsLoading(true);
83+
setLoadingProgress(0);
84+
85+
let selectedData = data.map((row) => ({
86+
value: row[selectedColumn],
87+
label: row[selectedLabel],
88+
}));
89+
90+
let selectedLabels = data.map((row) => row[selectedLabel]);
91+
let selectedValues = data.map((row) => row[selectedColumn]);
92+
93+
const sendData = {
94+
data: selectedValues,
95+
label: selectedLabels,
96+
batch_size: batchSize || 16,
97+
epochs: epochs || 50,
98+
learning_rate: learningRate || 0.001,
99+
name: modelName || "trained-model",
100+
};
101+
102+
console.log(sendData);
103+
104+
const url = "http://localhost:5000/neural-network-nb";
105+
106+
await axios.post(url, sendData).catch(async (error) => {
107+
await axios.post(url, sendData).catch(async (error) => {
108+
await axios.post(url, sendData).catch(async (error) => {
109+
await axios.post(url, sendData).catch((error) => {
110+
throw new Error(error);
111+
});
112+
});
84113
});
114+
});
85115

86116
setIsLoading(false);
87117
};
@@ -131,9 +161,14 @@ export default function TrainView() {
131161
useEffect(() => {
132162
const fetchData = async () => {
133163
try {
134-
const response = await axios.get("http://localhost:5000/training-status");
164+
const response = await axios.get(
165+
"http://localhost:5000/training-status"
166+
);
135167
const { training_progress, training_in_progress, train_losses, valid_losses } = response.data;
136-
const newProgress: number = training_in_progress || training_progress === 100 ? training_progress : 0;
168+
const newProgress: number =
169+
training_in_progress || training_progress === 100
170+
? training_progress
171+
: 0; // Explicitly type newProgress
137172
updateLoadingProgress(newProgress);
138173

139174
setTrainLosses(train_losses);
@@ -144,14 +179,16 @@ export default function TrainView() {
144179
};
145180

146181
const updateLoadingProgress = (newProgress: number) => {
182+
// Explicitly type newProgress parameter
147183
const duration = 1000;
148184
const startTime = Date.now();
149185
const startProgress = prevLoadingProgressRef.current;
150186

151187
const updateProgress = () => {
152188
const elapsedTime = Date.now() - startTime;
153189
const progress = Math.min(1, elapsedTime / duration);
154-
const interpolatedProgress = startProgress + (newProgress - startProgress) * progress;
190+
const interpolatedProgress =
191+
startProgress + (newProgress - startProgress) * progress;
155192
setLoadingProgress(interpolatedProgress);
156193

157194
if (progress < 1) {
@@ -295,7 +332,7 @@ export default function TrainView() {
295332

296333
{!isLoading && <button
297334
className={`w-2/4 bg-blue-400 text-white py-2 px-4 hover:bg-blue-500 focus:outline-none border-2 border-blue-500 rounded-xl h-14`}
298-
onClick={handleSubmit}
335+
onClick={handleNbSubmit}
299336
disabled={isLoading}
300337
>
301338
{isLoading ? "Carregando..." : "Treinar"}

0 commit comments

Comments
 (0)