diff --git a/04-dask.ipynb b/04-dask.ipynb index 8394aa2..06e29c2 100644 --- a/04-dask.ipynb +++ b/04-dask.ipynb @@ -74,55 +74,45 @@ "metadata": {}, "outputs": [], "source": [ - "ddf = dd.read_parquet(\"gcs://quansight-datasets/airline-ontime-performance/sorted/parquet_by_year\")" + "ddf = dd.read_parquet(\"gcs://quansight-datasets/airline-ontime-performance/sorted/full_dataset.parquet\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "9da49eb2-ee57-409a-a3bf-73e2c6cc196f", + "id": "01d5d2d8-54c9-4f47-9b94-49f8f0605572", "metadata": {}, "outputs": [], "source": [ - "ddf.head()" + "fligths_per_day = ddf.groupby(\"FL_DATE\", sort=True)[\"FLIGHTS\"].count().persist()" ] }, { "cell_type": "code", "execution_count": null, "id": "68434394-929a-4503-a149-6bd78bb96ab1", - "metadata": {}, - "outputs": [], - "source": [ - "hvplot.extension('bokeh')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0f88c057-e9cb-45b8-8a13-d316ede919a0", "metadata": { "scrolled": true }, "outputs": [], "source": [ - "list(ddf.columns)" + "hvplot.extension('bokeh')" ] }, { "cell_type": "code", "execution_count": null, - "id": "1324da8a-5ac8-4639-85ee-1bc393f27e4a", + "id": "8b90d2f8-5d26-4953-98b4-7213a9d36d97", "metadata": {}, "outputs": [], "source": [ - "ddf.groupby(\"FL_DATE\")[\"FLIGHTS\"].count().hvplot()" + "fligths_per_day.hvplot('FL_DATE', 'FLIGHTS')" ] }, { "cell_type": "code", "execution_count": null, - "id": "151ae525-6cf2-4260-8a00-e3eadff81306", + "id": "e4668ffa-de80-482d-9867-82731835eeb6", "metadata": {}, "outputs": [], "source": [