Skip to content

Commit

Permalink
Improve migration performance (#169)
Browse files Browse the repository at this point in the history
* perform delete IO async
* create only newly added sizes
* delete only obsolete file sizes
* create sizes from an in-memory copy of the file
  • Loading branch information
codingjoe authored May 25, 2024
1 parent f361b9c commit ec91252
Show file tree
Hide file tree
Showing 8 changed files with 640 additions and 107 deletions.
16 changes: 6 additions & 10 deletions pictures/migrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from django.db.migrations import AlterField
from django.db.models import Q

from pictures.models import PictureField, PictureFieldFile
from pictures.models import PictureField

__all__ = ["AlterPictureField"]

Expand Down Expand Up @@ -47,19 +47,15 @@ def alter_picture_field(
self.update_pictures(from_field, to_model)

def update_pictures(self, from_field: PictureField, to_model: Type[models.Model]):
"""Remove obsolete pictures and create new ones."""
for obj in to_model._default_manager.exclude(
Q(**{self.name: ""}) | Q(**{self.name: None})
).iterator():
field_file = getattr(obj, self.name)
field_file.update_all(
from_aspect_ratios=PictureFieldFile.get_picture_files(
file_name=field_file.name,
img_width=field_file.width,
img_height=field_file.height,
storage=field_file.storage,
field=from_field,
)
new_field_file = getattr(obj, self.name)
old_field_file = from_field.attr_class(
instance=obj, field=from_field, name=new_field_file.name
)
new_field_file.update_all(old_field_file)

def from_picture_field(self, from_model: Type[models.Model]):
for obj in from_model._default_manager.all().iterator():
Expand Down
78 changes: 64 additions & 14 deletions pictures/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,14 @@ class SimplePicture:
def __post_init__(self):
self.aspect_ratio = Fraction(self.aspect_ratio) if self.aspect_ratio else None

def __hash__(self):
return hash(self.name)

def __eq__(self, other):
if not isinstance(other, type(self)):
return NotImplemented
return self.deconstruct() == other.deconstruct()

@property
def url(self) -> str:
if conf.get_settings().USE_PLACEHOLDERS:
Expand Down Expand Up @@ -93,30 +101,64 @@ def save(self, image):
def delete(self):
self.storage.delete(self.name)

def deconstruct(self):
return (
f"{self.__class__.__module__}.{self.__class__.__qualname__}",
(
self.parent_name,
self.file_type,
str(self.aspect_ratio) if self.aspect_ratio else None,
self.storage.deconstruct(),
self.width,
),
{},
)


class PictureFieldFile(ImageFieldFile):

def __xor__(self, other) -> tuple[set[SimplePicture], set[SimplePicture]]:
"""Return the new and obsolete :class:`SimpleFile` instances."""
if not isinstance(other, PictureFieldFile):
return NotImplemented
new = self.get_picture_files_list() - other.get_picture_files_list()
obsolete = other.get_picture_files_list() - self.get_picture_files_list()

return new, obsolete

def save(self, name, content, save=True):
super().save(name, content, save)
self.save_all()

def save_all(self):
if self:
import_string(conf.get_settings().PROCESSOR)(self)
self.update_all()

def delete(self, save=True):
self.delete_all()
super().delete(save=save)

def delete_all(self, aspect_ratios=None):
aspect_ratios = aspect_ratios or self.aspect_ratios
for sources in aspect_ratios.values():
for srcset in sources.values():
for picture in srcset.values():
picture.delete()
def delete_all(self):
if self:
import_string(conf.get_settings().PROCESSOR)(
self.storage.deconstruct(),
self.name,
[],
[i.deconstruct() for i in self.get_picture_files_list()],
)

def update_all(self, from_aspect_ratios):
self.delete_all(from_aspect_ratios)
self.save_all()
def update_all(self, other: PictureFieldFile | None = None):
if self:
if not other:
new = self.get_picture_files_list()
old = []
else:
new, old = self ^ other
import_string(conf.get_settings().PROCESSOR)(
self.storage.deconstruct(),
self.name,
[i.deconstruct() for i in new],
[i.deconstruct() for i in old],
)

@property
def width(self):
Expand All @@ -137,7 +179,7 @@ def height(self):
return self._get_image_dimensions()[1]

@property
def aspect_ratios(self):
def aspect_ratios(self) -> {Fraction | None: {str: {int: SimplePicture}}}:
self._require_file()
return self.get_picture_files(
file_name=self.name,
Expand All @@ -155,7 +197,7 @@ def get_picture_files(
img_height: int,
storage: Storage,
field: PictureField,
):
) -> {Fraction | None: {str: {int: SimplePicture}}}:
return {
ratio: {
file_type: {
Expand All @@ -172,6 +214,14 @@ def get_picture_files(
for ratio in field.aspect_ratios
}

def get_picture_files_list(self) -> set[SimplePicture]:
return {
picture
for sources in self.aspect_ratios.values()
for srcset in sources.values()
for picture in srcset.values()
}


class PictureField(ImageField):
attr_class = PictureFieldFile
Expand All @@ -180,7 +230,7 @@ def __init__(
self,
verbose_name=None,
name=None,
aspect_ratios: [str] = None,
aspect_ratios: [str | Fraction | None] = None,
container_width: int = None,
file_types: [str] = None,
pixel_densities: [int] = None,
Expand Down
147 changes: 74 additions & 73 deletions pictures/tasks.py
Original file line number Diff line number Diff line change
@@ -1,57 +1,46 @@
from __future__ import annotations

import importlib
from typing import Protocol

from django.apps import apps
from django.core.files.storage import Storage
from django.db import transaction
from PIL import Image

from pictures import conf
from pictures.models import PictureFieldFile
from pictures import conf, utils


def _process_picture(field_file: PictureFieldFile) -> None:
# field_file.file may already be closed and can't be reopened.
# Therefore, we always open it from storage.
with field_file.storage.open(field_file.name) as fs:
with Image.open(fs) as img:
for ratio, sources in field_file.aspect_ratios.items():
for file_type, srcset in sources.items():
for width, picture in srcset.items():
picture.save(img)
class PictureProcessor(Protocol):

def __call__(
self,
storage: tuple[str, list, dict],
file_name: str,
new: list[tuple[str, list, dict]] | None = None,
old: list[tuple[str, list, dict]] | None = None,
) -> None: ...

process_picture = _process_picture

def _process_picture(
storage: tuple[str, list, dict],
file_name: str,
new: list[tuple[str, list, dict]] | None = None,
old: list[tuple[str, list, dict]] | None = None,
) -> None:
new = new or []
old = old or []
storage = utils.reconstruct(*storage)
if new:
with storage.open(file_name) as fs:
with Image.open(fs) as img:
for picture in new:
picture = utils.reconstruct(*picture)
picture.save(img)

def construct_storage(
storage_cls: str, storage_args: tuple, storage_kwargs: dict
) -> Storage:
storage_module, storage_class = storage_cls.rsplit(".", 1)
storage_cls = getattr(importlib.import_module(storage_module), storage_class)
return storage_cls(*storage_args, **storage_kwargs)

for picture in old:
picture = utils.reconstruct(*picture)
picture.delete()

def process_picture_async(
app_name: str, model_name: str, field_name: str, file_name: str, storage_construct
) -> None:
model = apps.get_model(f"{app_name}.{model_name}")
field = model._meta.get_field(field_name)
storage = construct_storage(*storage_construct)

with storage.open(file_name) as file:
with Image.open(file) as img:
for ratio, sources in PictureFieldFile.get_picture_files(
file_name=file_name,
img_width=img.width,
img_height=img.height,
storage=storage,
field=field,
).items():
for file_type, srcset in sources.items():
for width, picture in srcset.items():
picture.save(img)
process_picture: PictureProcessor = _process_picture


try:
Expand All @@ -62,21 +51,25 @@ def process_picture_async(

@dramatiq.actor(queue_name=conf.get_settings().QUEUE_NAME)
def process_picture_with_dramatiq(
app_name, model_name, field_name, file_name, storage_construct
storage: tuple[str, list, dict],
file_name: str,
new: list[tuple[str, list, dict]] | None = None,
old: list[tuple[str, list, dict]] | None = None,
) -> None:
process_picture_async(
app_name, model_name, field_name, file_name, storage_construct
)
_process_picture(storage, file_name, new, old)

def process_picture(field_file: PictureFieldFile) -> None: # noqa: F811
opts = field_file.instance._meta
def process_picture( # noqa: F811
storage: tuple[str, list, dict],
file_name: str,
new: list[tuple[str, list, dict]] | None = None,
old: list[tuple[str, list, dict]] | None = None,
) -> None:
transaction.on_commit(
lambda: process_picture_with_dramatiq.send(
app_name=opts.app_label,
model_name=opts.model_name,
field_name=field_file.field.name,
file_name=field_file.name,
storage_construct=field_file.storage.deconstruct(),
storage=storage,
file_name=file_name,
new=new,
old=old,
)
)

Expand All @@ -92,22 +85,26 @@ def process_picture(field_file: PictureFieldFile) -> None: # noqa: F811
retry_backoff=True,
)
def process_picture_with_celery(
app_name, model_name, field_name, file_name, storage_construct
storage: tuple[str, list, dict],
file_name: str,
new: list[tuple[str, list, dict]] | None = None,
old: list[tuple[str, list, dict]] | None = None,
) -> None:
process_picture_async(
app_name, model_name, field_name, file_name, storage_construct
)
_process_picture(storage, file_name, new, old)

def process_picture(field_file: PictureFieldFile) -> None: # noqa: F811
opts = field_file.instance._meta
def process_picture( # noqa: F811
storage: tuple[str, list, dict],
file_name: str,
new: list[tuple[str, list, dict]] | None = None,
old: list[tuple[str, list, dict]] | None = None,
) -> None:
transaction.on_commit(
lambda: process_picture_with_celery.apply_async(
kwargs=dict(
app_name=opts.app_label,
model_name=opts.model_name,
field_name=field_file.field.name,
file_name=field_file.name,
storage_construct=field_file.storage.deconstruct(),
storage=storage,
file_name=file_name,
new=new,
old=old,
),
queue=conf.get_settings().QUEUE_NAME,
)
Expand All @@ -122,20 +119,24 @@ def process_picture(field_file: PictureFieldFile) -> None: # noqa: F811

@job(conf.get_settings().QUEUE_NAME)
def process_picture_with_django_rq(
app_name, model_name, field_name, file_name, storage_construct
storage: tuple[str, list, dict],
file_name: str,
new: list[tuple[str, list, dict]] | None = None,
old: list[tuple[str, list, dict]] | None = None,
) -> None:
process_picture_async(
app_name, model_name, field_name, file_name, storage_construct
)
_process_picture(storage, file_name, new, old)

def process_picture(field_file: PictureFieldFile) -> None: # noqa: F811
opts = field_file.instance._meta
def process_picture( # noqa: F811
storage: tuple[str, list, dict],
file_name: str,
new: list[tuple[str, list, dict]] | None = None,
old: list[tuple[str, list, dict]] | None = None,
) -> None:
transaction.on_commit(
lambda: process_picture_with_django_rq.delay(
app_name=opts.app_label,
model_name=opts.model_name,
field_name=field_file.field.name,
file_name=field_file.name,
storage_construct=field_file.storage.deconstruct(),
storage=storage,
file_name=file_name,
new=new,
old=old,
)
)
20 changes: 20 additions & 0 deletions pictures/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,3 +112,23 @@ def placeholder(width: int, height: int, alt):
anchor="mm",
)
return img


def reconstruct(path: str, args: list, kwargs: dict):
"""Reconstruct a class instance from its deconstructed state."""
module_name, _, name = path.rpartition(".")
module = __import__(module_name, fromlist=[name])
klass = getattr(module, name)
_args = []
_kwargs = {}
for arg in args:
try:
_args.append(reconstruct(*arg))
except (TypeError, ValueError, ImportError):
_args.append(arg)
for key, value in kwargs.items():
try:
_kwargs[key] = reconstruct(*value)
except (TypeError, ValueError, ImportError):
_kwargs[key] = value
return klass(*_args, **_kwargs)
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[flake8]
max-line-length=88
select = C,E,F,W,B,B950
ignore = E203, E501, W503, E731
ignore = E203, E501, W503, E704, E731
Loading

0 comments on commit ec91252

Please sign in to comment.