Skip to content

Commit

Permalink
Update dependencies and pre-commit
Browse files Browse the repository at this point in the history
  • Loading branch information
cjdsellers committed Jun 5, 2024
1 parent 9bd0835 commit 7247acb
Show file tree
Hide file tree
Showing 8 changed files with 109 additions and 95 deletions.
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ repos:
# General checks
##############################################################################
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
rev: v4.6.0
hooks:
- id: fix-encoding-pragma
args: [--remove]
Expand All @@ -24,7 +24,7 @@ repos:
- id: check-yaml

- repo: https://github.com/codespell-project/codespell
rev: v2.2.6
rev: v2.3.0
hooks:
- id: codespell
description: Checks for common misspellings.
Expand All @@ -41,20 +41,20 @@ repos:
types: [python]

- repo: https://github.com/psf/black
rev: 24.2.0
rev: 24.4.2
hooks:
- id: black
types_or: [python, pyi]
entry: "black"

- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.3.0
rev: v0.4.7
hooks:
- id: ruff
args: ["--fix"]

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.8.0
rev: v1.10.0
hooks:
- id: mypy
args: [--no-strict-optional, --ignore-missing-imports, --warn-no-return, --explicit-package-bases]
Expand Down
7 changes: 6 additions & 1 deletion bench_data/check_invariant.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
import sys

import pandas as pd


def check_file(file_name):
"""Check if the 'start_ts' column is in ascending order and 'end_ts' for a row groups comes before 'start_ts' of the next row group."""
"""
Check if the 'start_ts' column is in ascending order and 'end_ts' for a row
groups comes before 'start_ts' of the next row group.
"""
df = pd.read_csv(file_name)

# Check if 'start_ts' is in ascending order
Expand Down
16 changes: 10 additions & 6 deletions bench_data/extract_groups.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import sys

import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import sys


# Define schema for quote ticks
quote_tick_schema = pa.schema(
Expand All @@ -12,15 +14,15 @@
("ask_size", pa.uint64()),
("ts_event", pa.uint64()),
("ts_init", pa.uint64()),
]
],
)

quote_tick_schema = quote_tick_schema.with_metadata(
{
"instrument_id": "EUR/USD.SIM",
"price_precision": "0",
"size_precision": "0",
}
},
)

trade_tick_schema = pa.schema(
Expand All @@ -31,15 +33,15 @@
("trade_id", pa.string()),
("ts_event", pa.uint64()),
("ts_init", pa.uint64()),
]
],
)

trade_tick_schema = trade_tick_schema.with_metadata(
{
"instrument_id": "EUR/USD.SIM",
"price_precision": "0",
"size_precision": "0",
}
},
)


Expand All @@ -64,7 +66,9 @@ def write_parquet_with_row_group(input_file, output_file, rows_per_row_group):

if __name__ == "__main__":
if len(sys.argv) < 4:
print("Usage: python extract_ts_init.py <parquet_file> <num_rows_per_row_group>")
print(
"Usage: python extract_ts_init.py <parquet_file> <num_rows_per_row_group>",
)
sys.exit(1)

# Get command-line inputs
Expand Down
7 changes: 5 additions & 2 deletions bench_data/extract_ts_init.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import pyarrow.parquet as pq
import csv
import sys

import pyarrow.parquet as pq


def extract_ts_init_values(parquet_file, csv_file):
"""Write the first and last 'ts_init' values of each row group to a CSV file."""
Expand All @@ -11,7 +12,9 @@ def extract_ts_init_values(parquet_file, csv_file):
# Open the CSV file for writing
with open(csv_file, "w", newline="") as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["index", "start_ts", "end_ts", "group_size"]) # Write the header
writer.writerow(
["index", "start_ts", "end_ts", "group_size"],
) # Write the header

# Iterate over each row group in the Parquet file
for i in range(parquet_file.num_row_groups):
Expand Down
11 changes: 7 additions & 4 deletions bench_data/gen_data_stats.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import sys
import os
import csv
import os
import sys

import pyarrow.parquet as pq


Expand All @@ -9,7 +10,7 @@ def record_data_stats(folder_path, csv_file):
with open(csv_file, "w", newline="") as csvfile:
writer = csv.writer(csvfile)
writer.writerow(
["file_name", "file_size_kb", "total_rows", "max_row_group_size"]
["file_name", "file_size_kb", "total_rows", "max_row_group_size"],
) # Write the header

# Walk the folder
Expand All @@ -34,7 +35,9 @@ def record_data_stats(folder_path, csv_file):
max_row_group_size = max(max_row_group_size, num_rows)

# Write the statistics to the CSV file
writer.writerow([file_path, file_size_kb, total_rows, max_row_group_size])
writer.writerow(
[file_path, file_size_kb, total_rows, max_row_group_size],
)


if __name__ == "__main__":
Expand Down
8 changes: 6 additions & 2 deletions nautilus_data/hist_data_to_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@

from os import PathLike
from pathlib import Path
import requests

import requests
from nautilus_trader.persistence.catalog import ParquetDataCatalog
from nautilus_trader.persistence.wranglers import QuoteTickDataWrangler
from nautilus_trader.test_kit.providers import CSVTickDataLoader
Expand All @@ -36,7 +36,11 @@ def load_fx_hist_data(
instrument = TestInstrumentProvider.default_fx_ccy(currency)
wrangler = QuoteTickDataWrangler(instrument)

df = CSVTickDataLoader.load(filename, index_col=0, datetime_format="%Y%m%d %H%M%S%f")
df = CSVTickDataLoader.load(
filename,
index_col=0,
datetime_format="%Y%m%d %H%M%S%f",
)
df.columns = ["bid_price", "ask_price", "size"]
print(df)

Expand Down
Loading

0 comments on commit 7247acb

Please sign in to comment.