Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,281 changes: 702 additions & 579 deletions poetry.lock

Large diffs are not rendered by default.

758 changes: 435 additions & 323 deletions requirements.txt

Large diffs are not rendered by default.

Empty file added tests/integration/__init__.py
Empty file.
19 changes: 19 additions & 0 deletions tests/integration/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from pathlib import Path
import pytest


@pytest.fixture
def data_folder_path():
location = Path(__file__).parent.resolve()
return location / "data"


@pytest.fixture
def expected_folder_path():
location = Path(__file__).parent.resolve()
return location / "expected"


@pytest.fixture
def ecb_path(data_folder_path):
return data_folder_path / "ecb_exchange_rate.csv"
Empty file.
101 changes: 101 additions & 0 deletions tests/integration/data/ecb_exchange_rate.csv

Large diffs are not rendered by default.

Binary file not shown.
37 changes: 37 additions & 0 deletions tests/integration/test_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from ts_diffusion.data import collate_convert_tensor, DataFrameDataset
import pandas as pd
import pickle
import pytest
from torch.testing import assert_allclose


@pytest.fixture
def input_length():
return 3


@pytest.fixture
def horizon():
return 2


def test_dataframe_dataset(ecb_path, input_length, horizon, expected_folder_path):

is_regenerate_artefact = False

expected_data_path = expected_folder_path / "ecb_dataframe_dataset.pickle"

df = pd.read_csv(ecb_path).drop("Date", axis=1)
ds = DataFrameDataset(dataframe=df, input_length=input_length, horizon=horizon)

if is_regenerate_artefact:
with open(expected_data_path, "wb+") as fp:
pickle.dump(ds, fp)
raise Exception(f"regeneration set to True, please set it to False")

with open(expected_data_path, "rb") as fp:
ds_expected = pickle.load(fp)

for de, d in zip(list(ds_expected), list(ds)):
for de_, d_ in zip(de, d):
assert_allclose(de_, d_)
28 changes: 28 additions & 0 deletions tests/integration/test_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from ts_diffusion.model import Sample
from ts_diffusion.data import DataFrameDataset, collate_convert_tensor
from torch.utils.data import DataLoader
import pandas as pd

import pytest


@pytest.fixture
def input_length():
return 3


@pytest.fixture
def horizon():
return 2


def test_sample(ecb_path, horizon, input_length):

df = pd.read_csv(ecb_path).drop("Date", axis=1)
ds = DataFrameDataset(dataframe=df, input_length=input_length, horizon=horizon)

dl = DataLoader(dataset=ds, batch_size=2, collate_fn=collate_convert_tensor)

s = Sample()
for b in dl:
s(b)
Empty file added tests/unit/__init__.py
Empty file.
35 changes: 35 additions & 0 deletions tests/unit/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import pytest
import pandas as pd


@pytest.fixture
def dataframe_length():
return 10


@pytest.fixture
def input_length():
return 3


@pytest.fixture
def horizon():
return 2


def test_a(input_length):
pass


@pytest.fixture
def dataframe(dataframe_length):

return pd.DataFrame(
{
"date": [
d.toordinal()
for d in pd.date_range("2021-01-01", periods=dataframe_length)
],
"target": list(range(dataframe_length)),
}
)
27 changes: 27 additions & 0 deletions tests/unit/test_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from ts_diffusion.data import DataFrameDataset
import pytest
from torch.utils.data import DataLoader
from ts_diffusion.data import collate_convert_tensor


@pytest.fixture
def dataframe_dataset(dataframe, input_length, horizon):
return DataFrameDataset(
dataframe=dataframe, input_length=input_length, horizon=horizon
)


def test_dataframe_dataset_length(dataframe_dataset):

assert len(dataframe_dataset) == 6 # 10 - 3 - 2 + 1


def test_pandasdataframe_dataloader(dataframe_dataset):

dl = DataLoader(
dataset=dataframe_dataset, batch_size=2, collate_fn=collate_convert_tensor
)

next(iter(dl))

assert len(dl) == 3 # len(dataframe_dataset) / 2 = 6 /2 = 3
76 changes: 76 additions & 0 deletions ts_diffusion/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from torch.utils.data import DataLoader, Dataset
from typing import Optional

import pandas as pd
import torch
from loguru import logger


class DataFrameDataset(Dataset):
"""A dataset from a pandas dataframe

:param dataframe: input dataframe with a DatetimeIndex.
:param input_length: length of input in time dimension
:param horizon: future length to be forecasted
"""

def __init__(self, dataframe: pd.DataFrame, input_length: int, horizon: int):
super().__init__()
self.dataframe = dataframe
self.input_length = input_length
self.horizon = horizon
self.dataframe_rows = len(self.dataframe)
self.length = self.dataframe_rows - self.input_length - self.horizon + 1

def moving_slicing(self, idx):

x, y = (
self.dataframe[idx : self.input_length + idx].values,
self.dataframe[
self.input_length + idx : self.input_length + self.horizon + idx
].values,
)
return x, y

def _validate_dataframe(self):
"""Validate the input dataframe.
- We require the dataframe index to be DatetimeIndex.
- This dataset is null aversion.
- Dataframe index should be sorted.
"""

if not isinstance(
self.dataframe.index, pd.core.indexes.datetimes.DatetimeIndex
):
raise TypeError(
f"Type of the dataframe index is not DatetimeIndex: {type(self.dataframe.index)}"
)

has_na = self.dataframe.isnull().values.any()

if has_na:
logger.warning(f"Dataframe has null")

has_index_sorted = self.dataframe.index.equals(
self.dataframe.index.sort_values()
)

if not has_index_sorted:
logger.warning(f"Dataframe index is not sorted")

def __getitem__(self, idx):
if idx >= self.length:
raise IndexError("End of dataset")
return self.moving_slicing(idx)

def __len__(self):
return self.length


def collate_convert_tensor(batch):
"""batch first convention"""

x = torch.tensor([b[0] for b in batch])
y = torch.tensor([b[-1] for b in batch])

return x, y
10 changes: 10 additions & 0 deletions ts_diffusion/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from torch import nn


class Sample(nn.Module):
def __init__(self) -> None:
super().__init__()

def forward(self, batch):

return batch