Skip to content

Commit

Permalink
Merge pull request #17 from cilab-ufersa/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
roscibely authored Jun 19, 2023
2 parents 4a983ae + f1ec52d commit 516a138
Show file tree
Hide file tree
Showing 20 changed files with 8,904 additions and 8 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
# Scope is all repo folders.
*.json
*.ini
*.log
mlruns/
catboost_info/
*.png
*.eps


# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Period Cycle Prediction
# Predictive Modeling of Menstrual Cycle Length using Artificial Intelligence ⏰

## About

Artificial intelligence implementation for better cycle predictions.
Time Series Forecasting Approach based on Artificial intelligence implementation for better cycle predictions.

The period can be uncertain when a woman has irregular cycles. Moreover, the length of the period cycle varies from woman to woman. Therefore, every woman has a particular cycle. AI can help us to understand better about women cycles.

Expand Down Expand Up @@ -35,3 +35,9 @@ $ pip install -r requirements.txt
|---- |---- | ----- | ------ |
| 6 | 30|20XX | Starts |
| 7 | 1|20XX | Ends |

---

## Publications related to this project

Rosana Rego. 2023. [Predictive Modeling of Menstrual Cycle Length: A Time Series Forecasting Approach](https://doi.org/10.21203/rs.3.rs-3050181/v1), PREPRINT (Version 1) available at Research Square.
Binary file added icon.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
65 changes: 65 additions & 0 deletions period_cycle_prediction/models/arima.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from utils.utils import generate_synthetic_data, generate_final_features, split_dataset, convet2dataframe
from darts import TimeSeries
from darts.models import AutoARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import warnings
warnings.filterwarnings("ignore")

if __name__ == '__main__':

# load the data
total_regular_cycle_data = pd.read_csv('dataset/total_regular_cycle_data.csv')
features_total_regular_cycle_data, labels_total_regular_cycle_data = generate_final_features(total_regular_cycle_data)
input_train_total_regular_cycle, input_test_total_regular_cycle, output_train_total_regular_cycle, output_test_total_regular_cycle = split_dataset(features_total_regular_cycle_data, labels_total_regular_cycle_data, reshape=False)

input_train_total_regular_cycle_df = convet2dataframe(input_train_total_regular_cycle, ['period', 'cycle'])
output_train_total_regular_cycle = convet2dataframe(output_train_total_regular_cycle, ['period', 'cycle'])
input_test_total_regular_cycle_df = convet2dataframe(input_test_total_regular_cycle, ['period', 'cycle'])
series_test = TimeSeries.from_dataframe(input_test_total_regular_cycle_df, 'time', ['period'])
output_train_series = TimeSeries.from_dataframe(output_train_total_regular_cycle, 'time', ['period'])
series = TimeSeries.from_dataframe(input_train_total_regular_cycle_df, time_col='time', value_cols=['period'])

# series for cycle prediction
series_cycle = TimeSeries.from_dataframe(input_train_total_regular_cycle_df, time_col='time', value_cols=['cycle'])
series_cycle_test = TimeSeries.from_dataframe(input_test_total_regular_cycle_df, time_col='time', value_cols=['cycle'])

# train the model
model = AutoARIMA()
model.fit(series)

# make prediction
prediction_ = model.predict(len(series_test))
#-----------------------------------#
# model arima for cycle
model_cycle = AutoARIMA()
model_cycle.fit(series_cycle)
# prediction the cycle
prediction_cycle = model_cycle.predict(3)

testScore = np.sqrt(mean_squared_error(series_test.values(), prediction_.values()))
print('Test Score: %.2f MSE' % (testScore))
# calculate mean absolute error
testScore = mean_absolute_error(series_test.values(), prediction_.values())
print('Test Score: %.2f MAE' % (testScore))
# RMSE
print('Test Score: %.2f RMSE' % np.sqrt(testScore))
# calculate r2 score
testScore = r2_score(series_test.values(), prediction_.values())
print('Test Score: %.2f R2' % (testScore))

plt.figure(figsize=(4, 3))
plt.plot(np.arange(1,16),series_test.values()[-16:], '-->', linewidth=2.0)
plt.plot(np.arange(16, 17),prediction.values()[0].astype(int), 'o')
plt.plot(np.arange(16, 17),prediction.values()[0].astype(int), 'h')
plt.plot(np.arange(16, 17),prediction.values()[0].astype(int), '*')
# round the number in axis
plt.gca().yaxis.set_major_locator(plt.MaxNLocator(integer=True))
plt.gca().xaxis.set_major_locator(plt.MaxNLocator(integer=True))
plt.ylabel('Days')
plt.xlabel('Months')
plt.legend(['Cycle serie', 'ARIMA', 'LSTM', 'Lasso'], loc='lower left')
plt.title('Case 1: Predicting the next cycle duration')
plt.grid(True)
55 changes: 55 additions & 0 deletions period_cycle_prediction/models/lstm_simulation_case1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from sklearn.metrics import explained_variance_score
from sklearn.metrics import max_error
from utils.utils import generate_synthetic_data, generate_final_features, split_dataset
import warnings
warnings.filterwarnings("ignore")

if __name__ == '__main__':

total_regular_cycle_data = pd.read_csv('dataset\\total_regular_cycle_data.csv')
features_total_regular_cycle_data, labels_total_regular_cycle_data = generate_final_features(total_regular_cycle_data)
input_train_total_regular_cycle, input_test_total_regular_cycle, output_train_total_regular_cycle, output_test_total_regular_cycle = split_dataset(features_total_regular_cycle_data, labels_total_regular_cycle_data, reshape=False)

# create and fit the LSTM network
n_features = input_train_total_regular_cycle.shape[2]
model = Sequential()
model.add(LSTM(64, input_shape=(3, n_features), activation='tanh'))
model.add(Dropout(0.05))
model.add(Dense(n_features, activation='relu'))
model.summary()

opt=tf.keras.optimizers.Adam(learning_rate=0.01)
model.compile(loss='mean_squared_error', optimizer=opt, run_eagerly=True)
# add early stopping
early_stopping = tf.keras.callbacks.EarlyStopping( monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(input_train_total_regular_cycle, output_train_total_regular_cycle, epochs=100, batch_size=32, validation_split=0.2, verbose=1, callbacks=[early_stopping])

# plot the loss and val loss
plt.figure(figsize=(4, 3))
plt.plot(history.history['loss'], '-', linewidth=2)
plt.plot(history.history['val_loss'], '--', linewidth=2)
plt.grid(True)
plt.legend(['Train', 'Validation'])
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.title('Case 1: LSTM model loss')
ax = plt.axes([0.6, 0.4, .20, .20])
plt.plot(history.history['loss'], '-', linewidth=2)
plt.plot(history.history['val_loss'], '--', linewidth=2)
plt.grid(True)
ax.set_ylim(0.1, 3)
ax.set_xlim(70, 93)
# save history
history_df = pd.DataFrame(history.history)
history_df.to_csv('case1_history_lstm.csv', index=False)
70 changes: 70 additions & 0 deletions period_cycle_prediction/models/lstm_simulation_case2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from sklearn.metrics import explained_variance_score
from sklearn.metrics import max_error
from utils.utils import generate_synthetic_data, generate_final_features, split_dataset
import warnings
warnings.filterwarnings("ignore")

if __name__ == '__main__':

regular_cycle_data = pd.read_csv('dataset/regular_cycle_data.csv')
features_regular_cycle_data, labels_regular_cycle_data = generate_final_features(regular_cycle_data)
input_train_regular_cycle, input_test_regular_cycle, output_train_regular_cycle, output_test_regular_cycle = split_dataset(features_regular_cycle_data, labels_regular_cycle_data, reshape=False)

n_features = input_train_regular_cycle.shape[2]
model = Sequential()
model.add(LSTM(units=128, return_sequences=True, input_shape=(input_train_regular_cycle.shape[1], input_train_regular_cycle.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(units=64, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=32))
model.add(Dropout(0.2))
model.add(Dense(units=n_features, activation='relu'))

opt=tf.keras.optimizers.Adam()
model.compile(loss='mean_squared_error', optimizer=opt)
# add early stopping
early_stopping = tf.keras.callbacks.EarlyStopping( monitor='val_loss', patience=10, restore_best_weights=True)

history = model.fit(input_train_regular_cycle, output_train_regular_cycle, epochs=2000, batch_size=32, validation_split=0.2, callbacks=[early_stopping])

# plot the loss and val loss
plt.figure(figsize=(4, 3))
plt.plot(history.history['loss'], '-', linewidth=2)
plt.plot(history.history['val_loss'], '--', linewidth=2)
plt.grid(True)
plt.legend(['Train', 'Validation'])
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.title('Case 2: LSTM model loss')
# log scale
#plt.yscale('log')

# add a zoom in epoch 70 to 100
ax = plt.axes([0.6, 0.4, .20, .20])
plt.plot(history.history['loss'], '-', linewidth=2)
plt.plot(history.history['val_loss'], '--', linewidth=2)
plt.grid(True)
ax.set_ylim(1, 6)
ax.set_xlim(1500, 1650)


# save history
history_df = pd.DataFrame(history.history)
history_df.to_csv('case2_history_lstm.csv', index=False)

# save figure
fig = plt.gcf()
fig.savefig('case2_loss_lstm.eps', dpi=300, bbox_inches='tight')

# save model
model.save('case2_lstm_model.h5')
Loading

0 comments on commit 516a138

Please sign in to comment.