Skip to content

Commit

Permalink
Merge pull request #912 from alphagov/1899-streaming-csvs
Browse files Browse the repository at this point in the history
1899: Turn all CSV exports into streaming downloads
  • Loading branch information
ahernp authored Feb 25, 2025
2 parents 1d60f0b + bc531fa commit 0730ba4
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 89 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,6 @@ class MockForm:
cleaned_data: dict[str, str]


def decode_csv_response(response: HttpResponse) -> tuple[list[str], list[list[str]]]:
"""Decode CSV HTTP response and break into column names and data"""
content: str = response.content.decode("utf-8")
csv_reader: Any = csv.reader(io.StringIO(content))
csv_body: list[list[str]] = list(csv_reader)
csv_header: list[str] = csv_body.pop(0)
return csv_header, csv_body


def validate_csv_response(
csv_header: list[str],
csv_body: list[list[str]],
Expand Down
119 changes: 58 additions & 61 deletions accessibility_monitoring_platform/apps/exports/csv_export_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
import csv
from dataclasses import dataclass
from datetime import date, datetime
from typing import Any, Literal
from typing import Any, Generator, Literal

from django.db.models import QuerySet
from django.http import HttpResponse, StreamingHttpResponse
from django.http import StreamingHttpResponse
from django.urls import reverse

from ..audits.models import Audit
Expand Down Expand Up @@ -839,31 +839,6 @@ def populate_equality_body_columns(
return columns


def download_equality_body_cases(
cases: QuerySet[Case],
filename: str = "enforcement_body_cases.csv",
) -> HttpResponse:
"""Given a Case queryset, download the data in csv format for equality body"""
response: HttpResponse = HttpResponse(content_type="text/csv")
response["Content-Disposition"] = f"attachment; filename={filename}"

writer: Any = csv.writer(response)
writer.writerow(
[column.column_header for column in EQUALITY_BODY_COLUMNS_FOR_EXPORT]
)

output: list[list[str]] = []
for case in cases:
case_columns: list[EqualityBodyCSVColumn] = populate_equality_body_columns(
case=case
)
row = [column.formatted_data for column in case_columns]
output.append(row)
writer.writerows(output)

return response


def populate_csv_columns(
case: Case, column_definitions: list[CSVColumn]
) -> list[CSVColumn]:
Expand All @@ -887,59 +862,81 @@ def populate_csv_columns(
return columns


def download_cases(cases: QuerySet[Case], filename: str = "cases.csv") -> HttpResponse:
"""Given a Case queryset, download the data in csv format"""
def csv_output_generator(
cases: QuerySet[Case],
columns_for_export: list[CSVColumn],
equality_body_csv: bool = False,
) -> Generator[str, None, None]:
"""
Generate a series of strings containing the content for a CSV streaming response
"""

class DummyFile:
def write(self, value_to_write):
return value_to_write

def get_csv_output(cases: QuerySet[Case]) -> list[Any]:
writer: Any = csv.writer(DummyFile())
column_row: list[str] = [
column.column_header for column in CASE_COLUMNS_FOR_EXPORT
]
writer: Any = csv.writer(DummyFile())
column_row: list[str] = [column.column_header for column in columns_for_export]

output: str = writer.writerow(column_row)
output: str = writer.writerow(column_row)

for counter, case in enumerate(cases):
for counter, case in enumerate(cases):
if equality_body_csv is True:
case_columns: list[EqualityBodyCSVColumn] = populate_equality_body_columns(
case=case
)
else:
case_columns: list[CSVColumn] = populate_csv_columns(
case=case, column_definitions=CASE_COLUMNS_FOR_EXPORT
case=case, column_definitions=columns_for_export
)
row = [column.formatted_data for column in case_columns]
output += writer.writerow(row)
if counter % DOWNLOAD_CASES_CHUNK_SIZE == 0:
yield output
output = ""
if output:
row = [column.formatted_data for column in case_columns]
output += writer.writerow(row)
if counter % DOWNLOAD_CASES_CHUNK_SIZE == 0:
yield output
output = ""
if output:
yield output


def download_equality_body_cases(
cases: QuerySet[Case],
filename: str = "enforcement_body_cases.csv",
) -> StreamingHttpResponse:
"""Given a Case queryset, download the data in csv format for equality body"""
response = StreamingHttpResponse(
get_csv_output(cases=cases), content_type="text/csv"
csv_output_generator(
cases=cases,
columns_for_export=EQUALITY_BODY_COLUMNS_FOR_EXPORT,
equality_body_csv=True,
),
content_type="text/csv",
)
response["Content-Disposition"] = f"attachment; filename={filename}"
return response


def download_feedback_survey_cases(
cases: QuerySet[Case], filename: str = "feedback_survey_cases.csv"
) -> HttpResponse:
"""Given a Case queryset, download the feedback survey data in csv format"""
response: HttpResponse = HttpResponse(content_type="text/csv")
response["Content-Disposition"] = f"attachment; filename={filename}"
def download_cases(
cases: QuerySet[Case], filename: str = "cases.csv"
) -> StreamingHttpResponse:
"""Given a Case queryset, download the data in csv format"""

writer: Any = csv.writer(response)
writer.writerow(
[column.column_header for column in FEEDBACK_SURVEY_COLUMNS_FOR_EXPORT]
response = StreamingHttpResponse(
csv_output_generator(cases=cases, columns_for_export=CASE_COLUMNS_FOR_EXPORT),
content_type="text/csv",
)
response["Content-Disposition"] = f"attachment; filename={filename}"
return response

output: list[list[str]] = []
for case in cases:
case_columns: list[CSVColumn] = populate_csv_columns(
case=case, column_definitions=FEEDBACK_SURVEY_COLUMNS_FOR_EXPORT
)
row = [column.formatted_data for column in case_columns]
output.append(row)
writer.writerows(output)

def download_feedback_survey_cases(
cases: QuerySet[Case], filename: str = "feedback_survey_cases.csv"
) -> StreamingHttpResponse:
"""Given a Case queryset, download the feedback survey data in csv format"""
response = StreamingHttpResponse(
csv_output_generator(
cases=cases, columns_for_export=FEEDBACK_SURVEY_COLUMNS_FOR_EXPORT
),
content_type="text/csv",
)
response["Content-Disposition"] = f"attachment; filename={filename}"
return response
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import csv
import io
from datetime import date, datetime, timezone
from typing import Any
from typing import Any, Generator

import pytest
from django.http import HttpResponse, StreamingHttpResponse
Expand All @@ -18,6 +18,7 @@
FEEDBACK_SURVEY_COLUMNS_FOR_EXPORT,
CSVColumn,
EqualityBodyCSVColumn,
csv_output_generator,
download_cases,
download_equality_body_cases,
download_feedback_survey_cases,
Expand Down Expand Up @@ -55,16 +56,13 @@


def decode_csv_response(
response: HttpResponse | StreamingHttpResponse,
response: StreamingHttpResponse,
) -> tuple[list[str], list[list[str]]]:
"""Decode CSV HTTP response and break into column names and data"""
if isinstance(response, StreamingHttpResponse):
content_chunks: list[str] = [
chunk.decode("utf-8") for chunk in response.streaming_content
]
content: str = "".join(content_chunks)
else:
content: str = response.content.decode("utf-8")
content_chunks: list[str] = [
chunk.decode("utf-8") for chunk in response.streaming_content
]
content: str = "".join(content_chunks)
csv_reader: Any = csv.reader(io.StringIO(content))
csv_body: list[list[str]] = list(csv_reader)
csv_header: list[str] = csv_body.pop(0)
Expand Down Expand Up @@ -489,3 +487,31 @@ def test_populate_feedback_survey_columns():
)

assert len(row) == 8


@pytest.mark.django_db
def test_csv_output_generator():
"""
Test CSV output generator returns:
1. Column headers and first Case
2. Next 500 Cases (current DOWNLOAD_CASES_CHUNK_SIZE)
3. Stops after all Cases returned
"""
case: Case = Case.objects.create()
cases: list[Case] = [case for _ in range(501)]

generator: Generator[str, None, None] = csv_output_generator(
cases=cases, columns_for_export=CASE_COLUMNS_FOR_EXPORT
)

first_yield: str = next(generator)

assert first_yield.count("\n") == 2

second_yield: str = next(generator)

assert second_yield.count("\n") == 500

with pytest.raises(StopIteration):
next(generator)
40 changes: 30 additions & 10 deletions accessibility_monitoring_platform/apps/exports/tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from django.contrib.auth.models import User
from django.db import connection
from django.db.models.query import QuerySet
from django.http import HttpResponse
from django.http import HttpResponse, StreamingHttpResponse
from django.urls import reverse
from pytest_django.asserts import assertContains, assertNotContains

Expand All @@ -22,6 +22,17 @@
EXPORT_CSV_COLUMNS: str = "Equality body,Test type,Case number,Organisation"


def get_csv_streaming_content(
response: StreamingHttpResponse,
) -> str:
"""Decode CSV HTTP response and break into column names and data"""
content_chunks: list[str] = [
chunk.decode("utf-8") for chunk in response.streaming_content
]
content: str = "".join(content_chunks)
return content


def create_cases_and_export(
enforcement_body: Case.EnforcementBody = Case.EnforcementBody.EHRC,
) -> Export:
Expand Down Expand Up @@ -144,14 +155,17 @@ def test_draft_export_csv_returned(admin_client):
"""Test that draft csv returned"""
export: Export = create_cases_and_export()

response: HttpResponse = admin_client.get(
response: StreamingHttpResponse = admin_client.get(
reverse("exports:export-all-cases", kwargs={"pk": export.id})
)

assert response.status_code == 200
assert response.headers["Content-Type"] == "text/csv"
assertContains(response, EXPORT_CSV_COLUMNS)
assertContains(response, ORGANISATION_NAME)

csv_response: str = get_csv_streaming_content(response=response)

assert EXPORT_CSV_COLUMNS in csv_response
assert ORGANISATION_NAME in csv_response


@pytest.mark.parametrize(
Expand Down Expand Up @@ -222,27 +236,33 @@ def test_ready_export_csv_returned(admin_client):
"""
export: Export = create_cases_and_export()

response: HttpResponse = admin_client.get(
response: StreamingHttpResponse = admin_client.get(
reverse("exports:export-ready-cases", kwargs={"pk": export.id})
)

assert response.status_code == 200
assert response.headers["Content-Type"] == "text/csv"
assertContains(response, EXPORT_CSV_COLUMNS)
assertNotContains(response, ORGANISATION_NAME)

csv_response: str = get_csv_streaming_content(response=response)

assert EXPORT_CSV_COLUMNS in csv_response
assert ORGANISATION_NAME not in csv_response

export_case: ExportCase = export.exportcase_set.first()
export_case.status = ExportCase.Status.READY
export_case.save()

response: HttpResponse = admin_client.get(
response: StreamingHttpResponse = admin_client.get(
reverse("exports:export-ready-cases", kwargs={"pk": export.id})
)

assert response.status_code == 200
assert response.headers["Content-Type"] == "text/csv"
assertContains(response, EXPORT_CSV_COLUMNS)
assertContains(response, ORGANISATION_NAME)

csv_response: str = get_csv_streaming_content(response=response)

assert EXPORT_CSV_COLUMNS in csv_response
assert ORGANISATION_NAME in csv_response


def test_create_export(admin_client, admin_user):
Expand Down

0 comments on commit 0730ba4

Please sign in to comment.