Skip to content

Commit

Permalink
Merge pull request #909 from alphagov/1894-streaming-csv
Browse files Browse the repository at this point in the history
1894: Export as CSV with chunked streaming response
  • Loading branch information
ahernp authored Feb 21, 2025
2 parents a6d9e7d + 4ac1fab commit fddc1ac
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 16 deletions.
42 changes: 29 additions & 13 deletions accessibility_monitoring_platform/apps/exports/csv_export_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@
from typing import Any, Literal

from django.db.models import QuerySet
from django.http import HttpResponse
from django.http import HttpResponse, StreamingHttpResponse
from django.urls import reverse

from ..audits.models import Audit
from ..cases.models import Case, CaseCompliance, CaseStatus, Contact
from ..reports.models import Report

DOWNLOAD_CASES_CHUNK_SIZE: int = 500


@dataclass
class CSVColumn:
Expand Down Expand Up @@ -887,21 +889,35 @@ def populate_csv_columns(

def download_cases(cases: QuerySet[Case], filename: str = "cases.csv") -> HttpResponse:
"""Given a Case queryset, download the data in csv format"""
response: HttpResponse = HttpResponse(content_type="text/csv")
response["Content-Disposition"] = f"attachment; filename={filename}"

writer: Any = csv.writer(response)
writer.writerow([column.column_header for column in CASE_COLUMNS_FOR_EXPORT])
class DummyFile:
def write(self, value_to_write):
return value_to_write

output: list[list[str]] = []
for case in cases:
case_columns: list[CSVColumn] = populate_csv_columns(
case=case, column_definitions=CASE_COLUMNS_FOR_EXPORT
)
row = [column.formatted_data for column in case_columns]
output.append(row)
writer.writerows(output)
def get_csv_output(cases: QuerySet[Case]) -> list[Any]:
writer: Any = csv.writer(DummyFile())
column_row: list[str] = [
column.column_header for column in CASE_COLUMNS_FOR_EXPORT
]

output: str = writer.writerow(column_row)

for counter, case in enumerate(cases):
case_columns: list[CSVColumn] = populate_csv_columns(
case=case, column_definitions=CASE_COLUMNS_FOR_EXPORT
)
row = [column.formatted_data for column in case_columns]
output += writer.writerow(row)
if counter % DOWNLOAD_CASES_CHUNK_SIZE == 0:
yield output
output = ""
if output:
yield output

response = StreamingHttpResponse(
get_csv_output(cases=cases), content_type="text/csv"
)
response["Content-Disposition"] = f"attachment; filename={filename}"
return response


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from typing import Any

import pytest
from django.http import HttpResponse
from django.http import HttpResponse, StreamingHttpResponse

from ...audits.models import Audit
from ...cases.models import Case, Contact
Expand Down Expand Up @@ -54,9 +54,17 @@
CONTACT_EMAIL: str = "[email protected]"


def decode_csv_response(response: HttpResponse) -> tuple[list[str], list[list[str]]]:
def decode_csv_response(
response: HttpResponse | StreamingHttpResponse,
) -> tuple[list[str], list[list[str]]]:
"""Decode CSV HTTP response and break into column names and data"""
content: str = response.content.decode("utf-8")
if isinstance(response, StreamingHttpResponse):
content_chunks: list[str] = [
chunk.decode("utf-8") for chunk in response.streaming_content
]
content: str = "".join(content_chunks)
else:
content: str = response.content.decode("utf-8")
csv_reader: Any = csv.reader(io.StringIO(content))
csv_body: list[list[str]] = list(csv_reader)
csv_header: list[str] = csv_body.pop(0)
Expand Down

0 comments on commit fddc1ac

Please sign in to comment.