Skip to content

Commit 4321082

Browse files
michaelchuclaude
andauthored
Add PostgreSQL-backed data store for production deployments (#238)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 23656b5 commit 4321082

File tree

11 files changed

+452
-28
lines changed

11 files changed

+452
-28
lines changed

optopsy/data/_yf_helpers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@
99

1010
import pandas as pd
1111

12-
from optopsy.data.providers.cache import ParquetCache
12+
from optopsy.data.providers.cache import get_store
1313

1414
_log = logging.getLogger(__name__)
1515

1616
# Cache for yfinance OHLCV data (category="yf_stocks", one file per symbol).
1717
# Deliberately distinct from EODHD's "stocks" category to avoid schema collisions.
18-
_yf_cache = ParquetCache()
18+
_yf_cache = get_store()
1919
_YF_CACHE_CATEGORY = "yf_stocks"
2020
_YF_DEDUP_COLS = ["date"]
2121

optopsy/data/cli.py

Lines changed: 82 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,16 @@ def _cmd_cache_size(args: argparse.Namespace) -> None:
2222

2323
import_optional_dependency("pyarrow")
2424

25-
from optopsy.data.providers.cache import ParquetCache
25+
from optopsy.data.providers.cache import get_store
2626

27-
cache = ParquetCache()
28-
entries = cache.size()
27+
store = get_store()
28+
entries = store.size()
2929
if not entries:
3030
print("Cache is empty.")
3131
return
3232
for name, nbytes in entries.items():
3333
print(f" {name:<30s} {_format_bytes(nbytes):>10s}")
34-
print(f" {'Total':<30s} {_format_bytes(cache.total_size_bytes()):>10s}")
34+
print(f" {'Total':<30s} {_format_bytes(store.total_size_bytes()):>10s}")
3535

3636

3737
def _cmd_cache_clear(args: argparse.Namespace) -> None:
@@ -40,11 +40,11 @@ def _cmd_cache_clear(args: argparse.Namespace) -> None:
4040

4141
import_optional_dependency("pyarrow")
4242

43-
from optopsy.data.providers.cache import ParquetCache
43+
from optopsy.data.providers.cache import get_store
4444

45-
cache = ParquetCache()
45+
store = get_store()
4646
symbol = args.symbol
47-
count = cache.clear(symbol=symbol)
47+
count = store.clear(symbol=symbol)
4848
if symbol:
4949
print(f"Cleared {count} cached file(s) for {symbol.upper()}.")
5050
else:
@@ -199,10 +199,10 @@ def _download_stocks_with_rich(symbol: str) -> None:
199199
return
200200

201201
from optopsy.data._yf_helpers import _YF_CACHE_CATEGORY, _yf_fetch_and_cache
202-
from optopsy.data.providers.cache import ParquetCache
202+
from optopsy.data.providers.cache import get_store
203203

204-
cache = ParquetCache()
205-
cached = cache.read(_YF_CACHE_CATEGORY, symbol)
204+
store = get_store()
205+
cached = store.read(_YF_CACHE_CATEGORY, symbol)
206206

207207
with console.status(f"[bold green]Fetching {symbol} from yfinance…"):
208208
try:
@@ -219,7 +219,7 @@ def _download_stocks_with_rich(symbol: str) -> None:
219219
date_max = pd.to_datetime(result["date"]).dt.date.max()
220220
row_count = len(result)
221221

222-
size_bytes = cache.size().get(f"{_YF_CACHE_CATEGORY}/{symbol}", 0)
222+
size_bytes = store.size().get(f"{_YF_CACHE_CATEGORY}/{symbol}", 0)
223223
size_str = _format_bytes(size_bytes)
224224

225225
console.print(f" [bold]{symbol}[/bold] {date_min}{date_max}")
@@ -279,6 +279,58 @@ def _render() -> None:
279279
)
280280

281281

282+
def _cmd_import(args: argparse.Namespace) -> None:
283+
"""Import local parquet files into the PostgreSQL data store."""
284+
import os
285+
286+
import pandas as pd
287+
from rich.console import Console
288+
289+
_load_env()
290+
291+
from optopsy.data.providers.cache import get_store
292+
293+
console = Console()
294+
store = get_store()
295+
296+
# Verify we're targeting PostgreSQL
297+
db_url = os.environ.get("DATABASE_URL", "")
298+
if not db_url.startswith(("postgres://", "postgresql://")):
299+
console.print(
300+
"[red]DATABASE_URL is not set or is not PostgreSQL. "
301+
"Import requires a PostgreSQL database.[/red]"
302+
)
303+
return
304+
305+
for fpath in args.files:
306+
if not os.path.exists(fpath):
307+
console.print(f"[red]File not found: {fpath}[/red]")
308+
continue
309+
310+
# Auto-detect category from path
311+
category = args.category
312+
if not category:
313+
if "/options/" in fpath or "\\options\\" in fpath:
314+
category = "options"
315+
elif "/yf_stocks/" in fpath or "\\yf_stocks\\" in fpath:
316+
category = "yf_stocks"
317+
else:
318+
console.print(
319+
f"[yellow]Cannot detect category for {fpath}. Use -c flag.[/yellow]"
320+
)
321+
continue
322+
323+
symbol = os.path.basename(fpath).replace(".parquet", "").upper()
324+
325+
with console.status(f"Importing {symbol} ({category})..."):
326+
df = pd.read_parquet(fpath)
327+
store.write(category, symbol, df)
328+
329+
console.print(
330+
f" [green]Imported {symbol}: {len(df):,} rows into {category}[/green]"
331+
)
332+
333+
282334
def _build_data_subparsers(
283335
subparsers: argparse._SubParsersAction,
284336
) -> dict[str, argparse.ArgumentParser]:
@@ -340,6 +392,25 @@ def _build_data_subparsers(
340392
)
341393
clear_parser.set_defaults(func=_cmd_cache_clear)
342394

395+
# --- import ---
396+
import_parser = subparsers.add_parser(
397+
"import",
398+
help="Import local parquet files into the PostgreSQL data store",
399+
)
400+
import_parser.add_argument(
401+
"files",
402+
nargs="+",
403+
help="Parquet file paths (e.g. ~/.optopsy/cache/options/SPY.parquet)",
404+
)
405+
import_parser.add_argument(
406+
"-c",
407+
"--category",
408+
choices=["options", "yf_stocks"],
409+
default=None,
410+
help="Data category (auto-detected from path if omitted)",
411+
)
412+
import_parser.set_defaults(func=_cmd_import)
413+
343414
return {"cache": cache_parser}
344415

345416

optopsy/data/providers/cache.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import pandas as pd
1818

1919
from optopsy.data.paths import CACHE_DIR
20+
from optopsy.data.providers.store import DataStore
2021

2122
_log = logging.getLogger(__name__)
2223

@@ -98,7 +99,7 @@ def compute_date_gaps(
9899
return gaps
99100

100101

101-
class ParquetCache:
102+
class ParquetCache(DataStore):
102103
"""Simple parquet-based cache for immutable historical data.
103104
104105
Each (category, symbol) pair maps to a single parquet file at:
@@ -212,3 +213,17 @@ def size(self) -> dict[str, int]:
212213
def total_size_bytes(self) -> int:
213214
"""Return total cache size in bytes."""
214215
return sum(self.size().values())
216+
217+
218+
def get_store() -> DataStore:
219+
"""Return the appropriate data store backend.
220+
221+
Uses ``PostgresStore`` when ``DATABASE_URL`` is set to a PostgreSQL URL,
222+
otherwise falls back to the file-based ``ParquetCache``.
223+
"""
224+
db_url = os.environ.get("DATABASE_URL", "")
225+
if db_url.startswith(("postgres://", "postgresql://")):
226+
from optopsy.data.providers.pg_store import PostgresStore
227+
228+
return PostgresStore(db_url)
229+
return ParquetCache()

optopsy/data/providers/eodhd.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
import requests
2929

3030
from .base import DataProvider
31-
from .cache import ParquetCache
31+
from .cache import get_store
3232

3333
_log = logging.getLogger(__name__)
3434

@@ -147,7 +147,7 @@ def _safe_raise_for_status(resp: requests.Response) -> None:
147147

148148
class EODHDProvider(DataProvider):
149149
def __init__(self) -> None:
150-
self._cache = ParquetCache()
150+
self._cache = get_store()
151151
self._session = requests.Session()
152152
self._last_request_time: float = 0.0
153153
self._request_count: int = 0

0 commit comments

Comments
 (0)