From 36cbb60a8bf78527a33d87dc07a501fdc29faf51 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Sat, 11 Jan 2025 16:15:51 -0500 Subject: [PATCH 1/3] make HPF use config --- lenskit-hpf/lenskit/hpf.py | 19 ++++++++----------- lenskit-hpf/tests/test_hpf.py | 4 ++-- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/lenskit-hpf/lenskit/hpf.py b/lenskit-hpf/lenskit/hpf.py index 5d71095bc..4ca516d64 100644 --- a/lenskit-hpf/lenskit/hpf.py +++ b/lenskit-hpf/lenskit/hpf.py @@ -8,7 +8,8 @@ import hpfrec import numpy as np -from typing_extensions import Any, override +from pydantic import BaseModel +from typing_extensions import override from lenskit.data import Dataset, ItemList, QueryInput, RecQuery, Vocabulary from lenskit.pipeline import Component, Trainable @@ -16,6 +17,10 @@ _logger = logging.getLogger(__name__) +class HPFConfig(BaseModel, extra="allow"): + features: int = 50 + + class HPFScorer(Component[ItemList], Trainable): """ Hierarchical Poisson factorization, provided by @@ -34,21 +39,13 @@ class HPFScorer(Component[ItemList], Trainable): additional arguments to pass to :class:`hpfrec.HPF`. """ - features: int - _kwargs: dict[str, Any] + config: HPFConfig users_: Vocabulary user_features_: np.ndarray[tuple[int, int], np.dtype[np.float64]] items_: Vocabulary item_features_: np.ndarray[tuple[int, int], np.dtype[np.float64]] - def __init__(self, features: int = 50, **kwargs): - self.features = features - self._kwargs = kwargs - - def get_config(self): - return {"features": self.features} | self._kwargs - @property def is_trained(self) -> bool: return hasattr(self, "item_features_") @@ -64,7 +61,7 @@ def train(self, data: Dataset): } ) - hpf = hpfrec.HPF(self.features, reindex=False, **self._kwargs) + hpf = hpfrec.HPF(self.config.features, reindex=False, **self.config.__pydantic_extra__) _logger.info("fitting HPF model with %d features", self.features) hpf.fit(log) diff --git a/lenskit-hpf/tests/test_hpf.py b/lenskit-hpf/tests/test_hpf.py index 3af2366ec..43af6dd68 100644 --- a/lenskit-hpf/tests/test_hpf.py +++ b/lenskit-hpf/tests/test_hpf.py @@ -28,7 +28,7 @@ class TestHPF(BasicComponentTests, ScorerTests): @mark.slow def test_hpf_train_large(tmp_path, ml_ratings): - algo = hpf.HPFScorer(20) + algo = hpf.HPFScorer(features=20) ratings = ml_ratings.assign(rating=ml_ratings.rating + 0.5) ds = from_interactions_df(ratings) algo.train(ds) @@ -57,7 +57,7 @@ def test_hpf_train_large(tmp_path, ml_ratings): @mark.slow def test_hpf_train_binary(tmp_path, ml_ratings): - algo = hpf.HPFScorer(20) + algo = hpf.HPFScorer(features=20) ratings = ml_ratings.drop(columns=["timestamp", "rating"]) ds = from_interactions_df(ratings) algo.train(ds) From d7316c9f3e012a486213b978db37e3a7f748c0cf Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Sat, 11 Jan 2025 16:18:29 -0500 Subject: [PATCH 2/3] sort entities when serializing BiasConfig --- lenskit/lenskit/basic/bias.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lenskit/lenskit/basic/bias.py b/lenskit/lenskit/basic/bias.py index fea77fd71..8af2ca1c0 100644 --- a/lenskit/lenskit/basic/bias.py +++ b/lenskit/lenskit/basic/bias.py @@ -12,11 +12,11 @@ import logging from collections.abc import Container from dataclasses import dataclass -from typing import Literal +from typing import Annotated, Literal import numpy as np import torch -from pydantic import BaseModel, NonNegativeFloat +from pydantic import BaseModel, NonNegativeFloat, PlainSerializer from typing_extensions import Self, TypeAlias, overload from lenskit.data import ID, Dataset, ItemList, QueryInput, RecQuery, Vocabulary @@ -256,7 +256,9 @@ class BiasConfig(BaseModel, extra="forbid"): Configuration for :class:`BiasScorer`. """ - entities: set[Literal["user", "item"]] = {"user", "item"} + entities: Annotated[ + set[Literal["user", "item"]], PlainSerializer(lambda s: sorted(s), return_type=list[str]) + ] = {"user", "item"} """ The entities to compute biases for, in addition to global bais. Defaults to users and items. From 09e05f714f096115e174c7df41d4a33140ebb8e7 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Sat, 11 Jan 2025 16:29:57 -0500 Subject: [PATCH 3/3] fix hpf --- lenskit-hpf/lenskit/hpf.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lenskit-hpf/lenskit/hpf.py b/lenskit-hpf/lenskit/hpf.py index 4ca516d64..b84bb6809 100644 --- a/lenskit-hpf/lenskit/hpf.py +++ b/lenskit-hpf/lenskit/hpf.py @@ -8,7 +8,7 @@ import hpfrec import numpy as np -from pydantic import BaseModel +from pydantic import BaseModel, JsonValue from typing_extensions import override from lenskit.data import Dataset, ItemList, QueryInput, RecQuery, Vocabulary @@ -18,6 +18,7 @@ class HPFConfig(BaseModel, extra="allow"): + __pydantic_extra__: dict[str, JsonValue] features: int = 50 @@ -61,9 +62,9 @@ def train(self, data: Dataset): } ) - hpf = hpfrec.HPF(self.config.features, reindex=False, **self.config.__pydantic_extra__) + hpf = hpfrec.HPF(self.config.features, reindex=False, **self.config.__pydantic_extra__) # type: ignore - _logger.info("fitting HPF model with %d features", self.features) + _logger.info("fitting HPF model with %d features", self.config.features) hpf.fit(log) self.users_ = data.users