Skip to content

Commit

Permalink
Add nycflights dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
rich-iannone committed Feb 11, 2025
1 parent 03502b1 commit 39333ba
Show file tree
Hide file tree
Showing 6 changed files with 336,787 additions and 1 deletion.
336,777 changes: 336,777 additions & 0 deletions data_raw/nycflights.csv

Large diffs are not rendered by default.

Binary file added data_raw/nycflights.ddb
Binary file not shown.
8 changes: 8 additions & 0 deletions data_raw/x-02-duckdb.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ tbl_dates_times_text = pl.DataFrame(
)
small_table = pb.load_dataset(dataset="small_table", tbl_type="polars")
game_revenue = pb.load_dataset(dataset="game_revenue", tbl_type="polars")
nycflights = pb.load_dataset(dataset="nycflights", tbl_type="polars")
```


Expand Down Expand Up @@ -59,3 +60,10 @@ with duckdb.connect(database="game_revenue.ddb", read_only=False) as con:
CREATE TABLE IF NOT EXISTS 'game_revenue' AS SELECT * FROM game_revenue;
""")
```

```{python}
with duckdb.connect(database="nycflights.ddb", read_only=False) as con:
con.execute(f"""
CREATE TABLE IF NOT EXISTS 'nycflights' AS SELECT * FROM nycflights;
""")
```
Binary file added pointblank/data/nycflights-duckdb.zip
Binary file not shown.
Binary file added pointblank/data/nycflights.zip
Binary file not shown.
3 changes: 2 additions & 1 deletion pointblank/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def load_dataset(
"""

# Raise an error if the dataset is from the list of provided datasets
if dataset not in ["small_table", "game_revenue"]:
if dataset not in ["small_table", "game_revenue", "nycflights"]:
raise ValueError(
f"The dataset name `{dataset}` is not valid. Choose one of the following:\n"
"- `small_table`\n"
Expand Down Expand Up @@ -245,6 +245,7 @@ def load_dataset(
parse_date_columns = {
"small_table": ["date_time", "date"],
"game_revenue": ["session_start", "time", "start_day"],
"nycflights": [],
}

dataset = pd.read_csv(data_path, parse_dates=parse_date_columns[dataset])
Expand Down

0 comments on commit 39333ba

Please sign in to comment.