Skip to content

Commit

Permalink
extended json serialization
Browse files Browse the repository at this point in the history
  • Loading branch information
antonkulaga committed Feb 3, 2025
1 parent 3e996e7 commit 2d962e4
Show file tree
Hide file tree
Showing 3 changed files with 216 additions and 20 deletions.
80 changes: 79 additions & 1 deletion eliot/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
from typing import Callable
import json
import sys
from pathlib import Path
from collections import defaultdict, OrderedDict, Counter
from datetime import datetime, date, time
from uuid import UUID
from enum import Enum


class EliotJSONEncoder(json.JSONEncoder):
Expand All @@ -19,7 +24,7 @@ def default(self, o):
def json_default(o: object) -> object:
"""
JSON object encoder for non-standard types. In particular, supports NumPy
types. If you are wrappnig it, call it last, as it will raise a
types, Path objects, Pydantic models, dataclasses, Pandas and Polars objects. If you are wrapping it, call it last, as it will raise a
``TypeError`` on unsupported types.
"""
numpy = sys.modules.get("numpy", None)
Expand All @@ -39,6 +44,79 @@ def json_default(o: object) -> object:
}
else:
return o.tolist()

# Add Pydantic support
pydantic = sys.modules.get("pydantic", None)
if pydantic is not None and isinstance(o, pydantic.BaseModel):
return o.model_dump()

# Add dataclass support
if hasattr(o, '__dataclass_fields__'):
return {field: getattr(o, field) for field in o.__dataclass_fields__}

if isinstance(o, Path):
return str(o)

if isinstance(o, datetime):
return o.isoformat()

if isinstance(o, date):
return o.isoformat()

if isinstance(o, time):
return o.isoformat()

if isinstance(o, UUID):
return str(o)

if isinstance(o, set):
return list(o)

if isinstance(o, defaultdict):
return dict(o)

if isinstance(o, OrderedDict):
return dict(o)

if isinstance(o, Counter):
return dict(o)

if isinstance(o, complex):
return {"real": o.real, "imag": o.imag}

# Add Enum support
if isinstance(o, Enum):
return {
"__enum__": True,
"name": o.name,
"value": o.value,
"class": o.__class__.__name__
}

# Add Pandas support
pandas = sys.modules.get("pandas", None)
if pandas is not None:
if isinstance(o, pandas.Timestamp):
return o.isoformat()
if isinstance(o, pandas.Series):
return o.to_list()
if isinstance(o, pandas.DataFrame):
return o.to_dict(orient='records')
if isinstance(o, pandas.Interval):
return {'left': o.left, 'right': o.right, 'closed': o.closed}
if isinstance(o, pandas.Period):
return str(o)

# Add Polars support
polars = sys.modules.get("polars", None)
if polars is not None:
if isinstance(o, polars.Series):
return o.to_list()
if isinstance(o, polars.DataFrame):
return o.to_dicts()
if isinstance(o, polars.Datetime):
return o.isoformat()

raise TypeError("Unsupported type")


Expand Down
119 changes: 119 additions & 0 deletions eliot/tests/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from unittest import TestCase, skipUnless, skipIf
from json import loads
import sys

try:
import numpy as np
Expand Down Expand Up @@ -83,3 +84,121 @@ def test_large_numpy_array(self):
loads(dumps(a1002, default=json_default)),
{"array_start": a1002.flat[:10000].tolist(), "original_shape": [2, 5001]},
)

def test_basic_types(self):
"""Test serialization of basic Python types."""
from pathlib import Path
from datetime import datetime, date, time
from uuid import UUID
from collections import defaultdict, OrderedDict, Counter
from enum import Enum

class TestEnum(Enum):
A = 1
B = "test"

test_data = {
"path": Path("/tmp/test"),
"datetime": datetime(2024, 1, 1, 12, 0),
"date": date(2024, 1, 1),
"time": time(12, 0),
"uuid": UUID("12345678-1234-5678-1234-567812345678"),
"set": {1, 2, 3},
"defaultdict": defaultdict(list, {"a": [1, 2]}),
"ordered_dict": OrderedDict([("a", 1), ("b", 2)]),
"counter": Counter(["a", "a", "b"]),
"complex": 1 + 2j,
"enum": TestEnum.A
}

serialized = loads(dumps(test_data, default=json_default))

self.assertEqual(serialized["path"], "/tmp/test")
self.assertEqual(serialized["datetime"], "2024-01-01T12:00:00")
self.assertEqual(serialized["date"], "2024-01-01")
self.assertEqual(serialized["time"], "12:00:00")
self.assertEqual(serialized["uuid"], "12345678-1234-5678-1234-567812345678")
self.assertEqual(serialized["set"], [1, 2, 3])
self.assertEqual(serialized["defaultdict"], {"a": [1, 2]})
self.assertEqual(serialized["ordered_dict"], {"a": 1, "b": 2})
self.assertEqual(serialized["counter"], {"a": 2, "b": 1})
self.assertEqual(serialized["complex"], {"real": 1.0, "imag": 2.0})
self.assertEqual(serialized["enum"], {
"__enum__": True,
"name": "A",
"value": 1,
"class": "TestEnum"
})

@skipUnless(sys.modules.get("pydantic"), "Pydantic not installed.")
def test_pydantic(self):
"""Test serialization of Pydantic models."""
from pydantic import BaseModel

class TestModel(BaseModel):
name: str
value: int

model = TestModel(name="test", value=42)
serialized = loads(dumps(model, default=json_default))
self.assertEqual(serialized, {"name": "test", "value": 42})

@skipUnless(sys.modules.get("pandas"), "Pandas not installed.")
def test_pandas(self):
"""Test serialization of Pandas objects."""
import pandas as pd

# Test Timestamp
ts = pd.Timestamp('2024-01-01 12:00:00')
self.assertEqual(loads(dumps(ts, default=json_default)), "2024-01-01T12:00:00")

# Test Series
series = pd.Series([1, 2, 3])
self.assertEqual(loads(dumps(series, default=json_default)), [1, 2, 3])

# Test DataFrame
df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]})
self.assertEqual(
loads(dumps(df, default=json_default)),
[{'a': 1, 'b': 3}, {'a': 2, 'b': 4}]
)

# Test Interval
interval = pd.Interval(0, 1, closed='both')
self.assertEqual(
loads(dumps(interval, default=json_default)),
{'left': 0, 'right': 1, 'closed': 'both'}
)

# Test Period
period = pd.Period('2024-01')
self.assertEqual(loads(dumps(period, default=json_default)), "2024-01")

@skipUnless(sys.modules.get("polars"), "Polars not installed.")
def test_polars(self):
"""Test serialization of Polars objects."""
import polars as pl

# Test Series
series = pl.Series("a", [1, 2, 3])
self.assertEqual(loads(dumps(series, default=json_default)), [1, 2, 3])

# Test DataFrame
df = pl.DataFrame({"a": [1, 2], "b": [3, 4]})
self.assertEqual(
loads(dumps(df, default=json_default)),
[{"a": 1, "b": 3}, {"a": 2, "b": 4}]
)

def test_dataclass(self):
"""Test serialization of dataclasses."""
from dataclasses import dataclass

@dataclass
class TestDataClass:
name: str
value: int

obj = TestDataClass(name="test", value=42)
serialized = loads(dumps(obj, default=json_default))
self.assertEqual(serialized, {"name": "test", "value": 42})
37 changes: 18 additions & 19 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -1,26 +1,25 @@
[tox]
envlist = py38-numpy, pypy39, pypy310, py38, py39, py310, py311, py312, py313,
py38-twisted-latest,
linters3, sphinx

[gh-actions]
python =
3.8: py38, py38-numpy, py38-twisted-latest
3.9: py39, linters3, sphinx
3.10: py310
3.11: py311
3.12: py312
3.13: py313
3.13-dev: py313
pypy3.9: pypy39
pypy3.10: pypy310
envlist = py38,py39,py310,py311,py312,pypy3
isolated_build = True

[testenv]
deps =
pytest
boltons>=23.0.0
twisted>=22.0.0
dask>=2023.0.0
pyrsistent>=0.19.0
cffi
pyinstaller
dask[bag]
dask[distributed]
dask[pandas]
pandas
hypothesis>=6.0.0
testtools
commands =
{envpython} setup.py --version
pip install .[test]
pip list
{envpython} -m pytest -n 2
pip install -e .
pytest {posargs:tests}

## No Twisted ##
[testenv:py38-numpy]
Expand Down

0 comments on commit 2d962e4

Please sign in to comment.