Skip to content

Commit

Permalink
Merge pull request #16 from cilab-ufersa/feature/new
Browse files Browse the repository at this point in the history
Feature/new
  • Loading branch information
roscibely committed Jun 19, 2023
2 parents 08d7885 + 690ea5a commit f1ec52d
Show file tree
Hide file tree
Showing 19 changed files with 8,904 additions and 8 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
# Scope is all repo folders.
*.json
*.ini
*.log
mlruns/
catboost_info/
*.png
*.eps


# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Period Cycle Prediction
# Predictive Modeling of Menstrual Cycle Length using Artificial Intelligence ⏰

## About

Artificial intelligence implementation for better cycle predictions.
Time Series Forecasting Approach based on Artificial intelligence implementation for better cycle predictions.

The period can be uncertain when a woman has irregular cycles. Moreover, the length of the period cycle varies from woman to woman. Therefore, every woman has a particular cycle. AI can help us to understand better about women cycles.

Expand Down Expand Up @@ -35,3 +35,9 @@ $ pip install -r requirements.txt
|---- |---- | ----- | ------ |
| 6 | 30|20XX | Starts |
| 7 | 1|20XX | Ends |

---

## Publications related to this project

Rosana Rego. 2023. [Predictive Modeling of Menstrual Cycle Length: A Time Series Forecasting Approach](https://doi.org/10.21203/rs.3.rs-3050181/v1), PREPRINT (Version 1) available at Research Square.
65 changes: 65 additions & 0 deletions period_cycle_prediction/models/arima.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from utils.utils import generate_synthetic_data, generate_final_features, split_dataset, convet2dataframe
from darts import TimeSeries
from darts.models import AutoARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import warnings
warnings.filterwarnings("ignore")

if __name__ == '__main__':

# load the data
total_regular_cycle_data = pd.read_csv('dataset/total_regular_cycle_data.csv')
features_total_regular_cycle_data, labels_total_regular_cycle_data = generate_final_features(total_regular_cycle_data)
input_train_total_regular_cycle, input_test_total_regular_cycle, output_train_total_regular_cycle, output_test_total_regular_cycle = split_dataset(features_total_regular_cycle_data, labels_total_regular_cycle_data, reshape=False)

input_train_total_regular_cycle_df = convet2dataframe(input_train_total_regular_cycle, ['period', 'cycle'])
output_train_total_regular_cycle = convet2dataframe(output_train_total_regular_cycle, ['period', 'cycle'])
input_test_total_regular_cycle_df = convet2dataframe(input_test_total_regular_cycle, ['period', 'cycle'])
series_test = TimeSeries.from_dataframe(input_test_total_regular_cycle_df, 'time', ['period'])
output_train_series = TimeSeries.from_dataframe(output_train_total_regular_cycle, 'time', ['period'])
series = TimeSeries.from_dataframe(input_train_total_regular_cycle_df, time_col='time', value_cols=['period'])

# series for cycle prediction
series_cycle = TimeSeries.from_dataframe(input_train_total_regular_cycle_df, time_col='time', value_cols=['cycle'])
series_cycle_test = TimeSeries.from_dataframe(input_test_total_regular_cycle_df, time_col='time', value_cols=['cycle'])

# train the model
model = AutoARIMA()
model.fit(series)

# make prediction
prediction_ = model.predict(len(series_test))
#-----------------------------------#
# model arima for cycle
model_cycle = AutoARIMA()
model_cycle.fit(series_cycle)
# prediction the cycle
prediction_cycle = model_cycle.predict(3)

testScore = np.sqrt(mean_squared_error(series_test.values(), prediction_.values()))
print('Test Score: %.2f MSE' % (testScore))
# calculate mean absolute error
testScore = mean_absolute_error(series_test.values(), prediction_.values())
print('Test Score: %.2f MAE' % (testScore))
# RMSE
print('Test Score: %.2f RMSE' % np.sqrt(testScore))
# calculate r2 score
testScore = r2_score(series_test.values(), prediction_.values())
print('Test Score: %.2f R2' % (testScore))

plt.figure(figsize=(4, 3))
plt.plot(np.arange(1,16),series_test.values()[-16:], '-->', linewidth=2.0)
plt.plot(np.arange(16, 17),prediction.values()[0].astype(int), 'o')
plt.plot(np.arange(16, 17),prediction.values()[0].astype(int), 'h')
plt.plot(np.arange(16, 17),prediction.values()[0].astype(int), '*')
# round the number in axis
plt.gca().yaxis.set_major_locator(plt.MaxNLocator(integer=True))
plt.gca().xaxis.set_major_locator(plt.MaxNLocator(integer=True))
plt.ylabel('Days')
plt.xlabel('Months')
plt.legend(['Cycle serie', 'ARIMA', 'LSTM', 'Lasso'], loc='lower left')
plt.title('Case 1: Predicting the next cycle duration')
plt.grid(True)
55 changes: 55 additions & 0 deletions period_cycle_prediction/models/lstm_simulation_case1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from sklearn.metrics import explained_variance_score
from sklearn.metrics import max_error
from utils.utils import generate_synthetic_data, generate_final_features, split_dataset
import warnings
warnings.filterwarnings("ignore")

if __name__ == '__main__':

total_regular_cycle_data = pd.read_csv('dataset\\total_regular_cycle_data.csv')
features_total_regular_cycle_data, labels_total_regular_cycle_data = generate_final_features(total_regular_cycle_data)
input_train_total_regular_cycle, input_test_total_regular_cycle, output_train_total_regular_cycle, output_test_total_regular_cycle = split_dataset(features_total_regular_cycle_data, labels_total_regular_cycle_data, reshape=False)

# create and fit the LSTM network
n_features = input_train_total_regular_cycle.shape[2]
model = Sequential()
model.add(LSTM(64, input_shape=(3, n_features), activation='tanh'))
model.add(Dropout(0.05))
model.add(Dense(n_features, activation='relu'))
model.summary()

opt=tf.keras.optimizers.Adam(learning_rate=0.01)
model.compile(loss='mean_squared_error', optimizer=opt, run_eagerly=True)
# add early stopping
early_stopping = tf.keras.callbacks.EarlyStopping( monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(input_train_total_regular_cycle, output_train_total_regular_cycle, epochs=100, batch_size=32, validation_split=0.2, verbose=1, callbacks=[early_stopping])

# plot the loss and val loss
plt.figure(figsize=(4, 3))
plt.plot(history.history['loss'], '-', linewidth=2)
plt.plot(history.history['val_loss'], '--', linewidth=2)
plt.grid(True)
plt.legend(['Train', 'Validation'])
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.title('Case 1: LSTM model loss')
ax = plt.axes([0.6, 0.4, .20, .20])
plt.plot(history.history['loss'], '-', linewidth=2)
plt.plot(history.history['val_loss'], '--', linewidth=2)
plt.grid(True)
ax.set_ylim(0.1, 3)
ax.set_xlim(70, 93)
# save history
history_df = pd.DataFrame(history.history)
history_df.to_csv('case1_history_lstm.csv', index=False)
70 changes: 70 additions & 0 deletions period_cycle_prediction/models/lstm_simulation_case2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from sklearn.metrics import explained_variance_score
from sklearn.metrics import max_error
from utils.utils import generate_synthetic_data, generate_final_features, split_dataset
import warnings
warnings.filterwarnings("ignore")

if __name__ == '__main__':

regular_cycle_data = pd.read_csv('dataset/regular_cycle_data.csv')
features_regular_cycle_data, labels_regular_cycle_data = generate_final_features(regular_cycle_data)
input_train_regular_cycle, input_test_regular_cycle, output_train_regular_cycle, output_test_regular_cycle = split_dataset(features_regular_cycle_data, labels_regular_cycle_data, reshape=False)

n_features = input_train_regular_cycle.shape[2]
model = Sequential()
model.add(LSTM(units=128, return_sequences=True, input_shape=(input_train_regular_cycle.shape[1], input_train_regular_cycle.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(units=64, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=32))
model.add(Dropout(0.2))
model.add(Dense(units=n_features, activation='relu'))

opt=tf.keras.optimizers.Adam()
model.compile(loss='mean_squared_error', optimizer=opt)
# add early stopping
early_stopping = tf.keras.callbacks.EarlyStopping( monitor='val_loss', patience=10, restore_best_weights=True)

history = model.fit(input_train_regular_cycle, output_train_regular_cycle, epochs=2000, batch_size=32, validation_split=0.2, callbacks=[early_stopping])

# plot the loss and val loss
plt.figure(figsize=(4, 3))
plt.plot(history.history['loss'], '-', linewidth=2)
plt.plot(history.history['val_loss'], '--', linewidth=2)
plt.grid(True)
plt.legend(['Train', 'Validation'])
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.title('Case 2: LSTM model loss')
# log scale
#plt.yscale('log')

# add a zoom in epoch 70 to 100
ax = plt.axes([0.6, 0.4, .20, .20])
plt.plot(history.history['loss'], '-', linewidth=2)
plt.plot(history.history['val_loss'], '--', linewidth=2)
plt.grid(True)
ax.set_ylim(1, 6)
ax.set_xlim(1500, 1650)


# save history
history_df = pd.DataFrame(history.history)
history_df.to_csv('case2_history_lstm.csv', index=False)

# save figure
fig = plt.gcf()
fig.savefig('case2_loss_lstm.eps', dpi=300, bbox_inches='tight')

# save model
model.save('case2_lstm_model.h5')
Loading

0 comments on commit f1ec52d

Please sign in to comment.