docs(python): Document read_().lazy() antipattern (#21623)

pola-rs · Mar 6, 2025 · 5bb3675 · 5bb3675
1 parent aa8e47f
commit 5bb3675
Show file tree

Hide file tree

Showing 4 changed files with 34 additions and 5 deletions.
diff --git a/py-polars/polars/io/csv/functions.py b/py-polars/polars/io/csv/functions.py
@@ -228,17 +228,19 @@ def read_csv(
     --------
     scan_csv : Lazily read from a CSV file or multiple files via glob patterns.
 
+    Warnings
+    --------
+    Calling `read_csv().lazy()` is an antipattern as this forces Polars to materialize
+    a full csv file and therefore cannot push any optimizations into the reader.
+    Therefore always prefer `scan_csv` if you want to work with `LazyFrame` s.
+
     Notes
     -----
     If the schema is inferred incorrectly (e.g. as `pl.Int64` instead of `pl.Float64`),
     try to increase the number of lines used to infer the schema with
     `infer_schema_length` or override the inferred dtype for those columns with
     `schema_overrides`.
 
-    This operation defaults to a `rechunk` operation at the end, meaning that all data
-    will be stored continuously in memory. Set `rechunk=False` if you are benchmarking
-    the csv-reader. A `rechunk` is an expensive operation.
-
     Examples
     --------
     >>> pl.read_csv("data.csv", separator="|")  # doctest: +SKIP

diff --git a/py-polars/polars/io/ipc/functions.py b/py-polars/polars/io/ipc/functions.py
@@ -94,8 +94,16 @@ def read_ipc(
     -------
     DataFrame
 
+    See Also
+    --------
+    scan_ipc : Lazily read from an IPC file or multiple files via glob patterns.
+
     Warnings
     --------
+    Calling `read_ipc().lazy()` is an antipattern as this forces Polars to materialize
+    a full csv file and therefore cannot push any optimizations into the reader.
+    Therefore always prefer `scan_ipc` if you want to work with `LazyFrame` s.
+
     If `memory_map` is set, the bytes on disk are mapped 1:1 to memory.
     That means that you cannot write to the same filename.
     E.g. `pl.read_ipc("my_file.arrow").write_ipc("my_file.arrow")` will fail.

diff --git a/py-polars/polars/io/ndjson.py b/py-polars/polars/io/ndjson.py
@@ -117,6 +117,17 @@ def read_ndjson(
     include_file_paths
         Include the path of the source file(s) as a column with this name.
 
+    See Also
+    --------
+    scan_ndjson : Lazily read from an NDJSON file or multiple files via glob patterns.
+
+    Warnings
+    --------
+    Calling `read_ndjson().lazy()` is an antipattern as this forces Polars to
+    materialize a full ndjson file and therefore cannot push any optimizations into
+    the reader. Therefore always prefer `scan_ndjson` if you want to work with
+    `LazyFrame` s.
+
     Examples
     --------
     >>> from io import StringIO

diff --git a/py-polars/polars/io/parquet/functions.py b/py-polars/polars/io/parquet/functions.py
@@ -175,8 +175,16 @@ def read_parquet(
 
     See Also
     --------
-    scan_parquet
+    scan_parquet: Lazily read from a parquet file or multiple files via glob patterns.
     scan_pyarrow_dataset
+
+    Warnings
+    --------
+    Calling `read_parquet().lazy()` is an antipattern as this forces Polars to
+    materialize a full parquet file and therefore cannot push any optimizations
+    into the reader. Therefore always prefer `scan_parquet` if you want to work
+    with `LazyFrame` s.
+
     """
     if schema is not None:
         msg = "The `schema` parameter of `read_parquet` is considered unstable."