diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3539b4851..896dbb8ac 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -611,9 +611,12 @@ jobs: - name: Download ML data run: | python -m lenskit.data.fetch ml-100k ml-1m ml-10m ml-20m - - name: "📕 Validate documentation examples" + - name: "📕 Validate code examples" run: | - pytest --cov=lenskit/lenskit --cov=lenskit-funksvd/lenskit --cov=lenskit-implicit/lenskit --cov=lenskit-hpf/lenskit --nbval-lax --doctest-glob='*.rst' --ignore='docs/_ext' --log-file test-docs.log docs */lenskit + sphinx-build -b doctest docs build/doc + - name: "📕 Validate example notebooks" + run: | + pytest --cov=lenskit/lenskit --cov=lenskit-funksvd/lenskit --cov=lenskit-implicit/lenskit --cov=lenskit-hpf/lenskit --nbval-lax --log-file test-notebooks.log docs - name: "📐 Coverage results" if: '${{ !cancelled() }}' run: | diff --git a/.vscode/ltex.dictionary.en-US.txt b/.vscode/ltex.dictionary.en-US.txt index 4ad9b9375..f3bda7f53 100644 --- a/.vscode/ltex.dictionary.en-US.txt +++ b/.vscode/ltex.dictionary.en-US.txt @@ -22,3 +22,6 @@ RecSys PyArrow Numba DuckDB +ItemList +Pydantic +dataclass diff --git a/conftest.py b/conftest.py index 45e48f959..100f6abc6 100644 --- a/conftest.py +++ b/conftest.py @@ -16,10 +16,10 @@ from pytest import fixture, skip from lenskit.parallel import ensure_parallel_init +from lenskit.random import set_global_rng # bring common fixtures into scope from lenskit.testing import ml_100k, ml_ds, ml_ds_unchecked, ml_ratings # noqa: F401 -from lenskit.util.random import set_global_rng logging.getLogger("numba").setLevel(logging.INFO) diff --git a/docs/api/data-types.rst b/docs/api/data-types.rst index 3abeed62e..8e40f464f 100644 --- a/docs/api/data-types.rst +++ b/docs/api/data-types.rst @@ -17,4 +17,4 @@ Entity Identifiers Containers ~~~~~~~~~~ -.. autoclass:: UITuple +.. autoclass:: UIPair diff --git a/docs/api/index.rst b/docs/api/index.rst index bfded543d..bde3dcc24 100644 --- a/docs/api/index.rst +++ b/docs/api/index.rst @@ -12,7 +12,6 @@ Core Abstractions lenskit.pipeline lenskit.diagnostics lenskit.operations - lenskit.types .. toctree:: :caption: Core @@ -81,3 +80,4 @@ and may be useful in building new models and components for LensKit. lenskit.parallel lenskit.testing lenskit.util + lenskit.random diff --git a/docs/api/pipeline.rst b/docs/api/pipeline.rst index 45f46c76e..c18e5530b 100644 --- a/docs/api/pipeline.rst +++ b/docs/api/pipeline.rst @@ -31,7 +31,6 @@ LensKit components. ~lenskit.pipeline.Component ~lenskit.pipeline.Trainable - ~lenskit.pipeline.Configurable Standard Pipelines ------------------ diff --git a/docs/conf.py b/docs/conf.py index b30fe3dbf..e54ca70f3 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -4,8 +4,10 @@ # Licensed under the MIT license, see LICENSE.md for details. # SPDX-License-Identifier: MIT +import doctest import sys from importlib.metadata import version +from os import fspath from pathlib import Path from packaging.version import Version @@ -25,6 +27,7 @@ "sphinx.ext.napoleon", "sphinx.ext.autodoc", "sphinx.ext.autosummary", + "sphinx.ext.doctest", "sphinx.ext.intersphinx", "sphinx.ext.mathjax", "sphinx.ext.extlinks", @@ -102,9 +105,9 @@ autodoc_typehints = "description" autodoc_type_aliases = { "ArrayLike": "numpy.typing.ArrayLike", - "SeedLike": "lenskit.types.SeedLike", - "RNGLike": "lenskit.types.RNGLike", - "RNGInput": "lenskit.types.RNGInput", + "SeedLike": "lenskit.random.SeedLike", + "RNGLike": "lenskit.random.RNGLike", + "RNGInput": "lenskit.random.RNGInput", "IDSequence": "lenskit.data.types.IDSequence", } # autosummary_generate_overwrite = False @@ -133,6 +136,10 @@ bibtex_bibfiles = ["lenskit.bib"] nb_execution_mode = "off" +doctest_path = [fspath((Path(__file__).parent / "guide" / "examples").resolve())] +doctest_default_flags = ( + doctest.ELLIPSIS | doctest.IGNORE_EXCEPTION_DETAIL | doctest.NORMALIZE_WHITESPACE +) mermaid_d3_zoom = True diff --git a/docs/guide/GettingStarted.ipynb b/docs/guide/GettingStarted.ipynb index 2644c28d4..0bfc360fb 100644 --- a/docs/guide/GettingStarted.ipynb +++ b/docs/guide/GettingStarted.ipynb @@ -179,12 +179,12 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "model_ii = ItemKNNScorer(20)\n", - "model_als = BiasedMFScorer(50)" + "model_ii = ItemKNNScorer(k=20)\n", + "model_als = BiasedMFScorer(features=50)" ] }, { @@ -231,7 +231,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/michael/Documents/LensKit/lkpy/lenskit/lenskit/als/_explicit.py:94: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /Users/runner/miniforge3/conda-bld/libtorch_1733624403138/work/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)\n", + "/Users/mde48/LensKit/lkpy/lenskit/lenskit/als/_explicit.py:59: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /Users/runner/miniforge3/conda-bld/libtorch_1733624403138/work/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)\n", " rmat = rmat.to_sparse_csr()\n" ] } @@ -329,15 +329,15 @@ " \n", " \n", " ALS\n", - " 0.129831\n", - " 0.096835\n", - " 0.208196\n", + " 0.125716\n", + " 0.092391\n", + " 0.199641\n", " \n", " \n", " II\n", - " 0.096751\n", - " 0.035333\n", - " 0.104951\n", + " 0.092792\n", + " 0.033473\n", + " 0.102041\n", " \n", " \n", "\n", @@ -346,8 +346,8 @@ "text/plain": [ " NDCG RBP RecipRank\n", "model \n", - "ALS 0.129831 0.096835 0.208196\n", - "II 0.096751 0.035333 0.104951" + "ALS 0.125716 0.092391 0.199641\n", + "II 0.092792 0.033473 0.102041" ] }, "execution_count": 9, @@ -366,7 +366,7 @@ "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfsAAAHqCAYAAAADAefsAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8ekN5oAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAq/UlEQVR4nO3df1DU94H/8dcKAjYoophFHSRoqsIQfy2JBwnRNh7+uDN6Z1ISI05ymhtsJgpUq4g2qWllEq11PAVPgyZOE+Xm1DP2SCNJo2OF1krAS1Ji2hPF49gixLJqvgHBz/cPx71uQCO//Kxvn4+ZnWE/+/589v2Zyebp58Pnszgsy7IEAACM1cvuCQAAgJ5F7AEAMByxBwDAcMQeAADDEXsAAAxH7AEAMByxBwDAcMQeAADDEft2WJYlj8cjvm8IAGACYt+OixcvKiwsTBcvXrR7KgAAdBmxBwDAcMQeAADDEXsAAAxH7AEAMByxBwDAcMQeAADDEXsAAAxne+zz8vIUExOjkJAQuVwuHT169IZja2trNXfuXI0aNUq9evVSRkbGTbe9Z88eORwOzZ49u3snDQDAHcTW2BcWFiojI0M5OTkqLy9XcnKypk+frurq6nbHNzU1adCgQcrJydHYsWNvuu2zZ89q6dKlSk5O7ompAwBwx3BYNn4n7MSJEzVhwgTl5+d7l8XGxmr27NnKzc296bqTJ0/WuHHjtHHjxjavtba2atKkSXruued09OhR/eUvf9F//Md/3PK8PB6PwsLC1NjYqH79+t3yegAA+CPbjuybm5tVVlamlJQUn+UpKSkqKSnp0rbXrFmjQYMGacGCBbc0vqmpSR6Px+cBAIApbIt9fX29Wltb5XQ6fZY7nU653e5Ob/fYsWMqKCjQ9u3bb3md3NxchYWFeR9RUVGdfn8AAPyN7RfoORwOn+eWZbVZdqsuXryoefPmafv27YqIiLjl9bKzs9XY2Oh9nDt3rlPvDwCAPwq0640jIiIUEBDQ5ii+rq6uzdH+rfrv//5vnTlzRjNnzvQuu3r1qiQpMDBQp06d0ogRI9qsFxwcrODg4E69JwAA/s62I/ugoCC5XC4VFxf7LC8uLlZSUlKntjl69Gh9/PHHqqio8D4ef/xxfec731FFRQWn5wEAdyXbjuwlKSsrS2lpaUpISFBiYqK2bdum6upqpaenS7p2er2mpka7du3yrlNRUSFJunTpks6fP6+KigoFBQUpLi5OISEhio+P93mP/v37S1Kb5QAA3C1sjX1qaqoaGhq0Zs0a1dbWKj4+XkVFRYqOjpZ07Ut0vn7P/fjx470/l5WV6e2331Z0dLTOnDlzO6cOAMAdw9b77P0V99n7N8uydPnyZe/ze+65p9MXdQLA3cDWI3ugMy5fvqxZs2Z5nx84cEChoaE2zggA/Jvtt94BAICeRewBADAcsQcAwHDEHgAAwxF7AAAMR+wBADAcsQcAwHDEHgAAwxF7AAAMR+wBADAcsQcAwHDEHgAAwxF7AAAMR+wBADAcsQcAwHDEHgAAwxF7AAAMR+wBADAcsQcAwHDEHgAAwxF7AAAMR+wBADBcoN0TuJu4lu2yewpGcLQ0K+yvnk9evUdWYJBt8zFB2br5dk8BQA/iyB4AAMMRewAADEfsAQAwHLEHAMBwxB4AAMMRewAADEfsAQAwHLEHAMBwxB4AAMMRewAADEfsAQAwHLEHAMBwxB4AAMMRewAADEfsAQAwHLEHAMBwxB4AAMMRewAADEfsAQAwHLEHAMBwxB4AAMMRewAADBdo9wSAjrICeqtxzNM+zwEAN0bscedxOGQFBtk9CwC4Y9h+Gj8vL08xMTEKCQmRy+XS0aNHbzi2trZWc+fO1ahRo9SrVy9lZGS0GbN9+3YlJycrPDxc4eHhmjJlio4fP96DewAAgH+zNfaFhYXKyMhQTk6OysvLlZycrOnTp6u6urrd8U1NTRo0aJBycnI0duzYdsccPnxYTz/9tD788EOVlpZq2LBhSklJUU1NTU/uCgAAfsthWZZl15tPnDhREyZMUH5+vndZbGysZs+erdzc3JuuO3nyZI0bN04bN2686bjW1laFh4dr8+bNmj9//i3Ny+PxKCwsTI2NjerXr98trXMrXMt2ddu2gO5Utu7WPhsA7ky2Hdk3NzerrKxMKSkpPstTUlJUUlLSbe/z5Zdf6sqVKxowYMANxzQ1Ncnj8fg8AAAwhW2xr6+vV2trq5xOp89yp9Mpt9vdbe+zYsUKDR06VFOmTLnhmNzcXIWFhXkfUVFR3fb+AADYzfYL9BwOh89zy7LaLOus1157Tbt379a+ffsUEhJyw3HZ2dlqbGz0Ps6dO9ct7w8AgD+w7da7iIgIBQQEtDmKr6ura3O03xnr16/X2rVr9f7772vMmDE3HRscHKzg4OAuvycAAP7ItiP7oKAguVwuFRcX+ywvLi5WUlJSl7a9bt06vfLKK/rVr36lhISELm0LAIA7na1fqpOVlaW0tDQlJCQoMTFR27ZtU3V1tdLT0yVdO71eU1OjXbv+7yr2iooKSdKlS5d0/vx5VVRUKCgoSHFxcZKunbpfvXq13n77bd13333eMwehoaEKDQ29vTsIAIAfsDX2qampamho0Jo1a1RbW6v4+HgVFRUpOjpa0rUv0fn6Pffjx4/3/lxWVqa3335b0dHROnPmjKRrX9LT3NysJ554wme9l156SS+//HKP7g8AAP7I1vvs/RX32eNuw332gNlsvxofAAD0LGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhAu2eAADg9rIsS5cvX/Y+v+eee+RwOGycEXoasQeAu8zly5c1a9Ys7/MDBw4oNDTUxhmhp3EaHwAAwxF7AAAMR+wBADAcsQcAwHC2xz4vL08xMTEKCQmRy+XS0aNHbzi2trZWc+fO1ahRo9SrVy9lZGS0O27v3r2Ki4tTcHCw4uLitH///h6aPQAA/s/W2BcWFiojI0M5OTkqLy9XcnKypk+frurq6nbHNzU1adCgQcrJydHYsWPbHVNaWqrU1FSlpaXp5MmTSktL0/e+9z397ne/68ldAQDAbzksy7LsevOJEydqwoQJys/P9y6LjY3V7NmzlZube9N1J0+erHHjxmnjxo0+y1NTU+XxePTuu+96l02bNk3h4eHavXv3Lc3L4/EoLCxMjY2N6tev363v0DdwLdvVbdsCulPZuvl2TwG30aVLl7j17i5j25F9c3OzysrKlJKS4rM8JSVFJSUlnd5uaWlpm21OnTr1pttsamqSx+PxeQAAYArbYl9fX6/W1lY5nU6f5U6nU263u9PbdbvdHd5mbm6uwsLCvI+oqKhOvz8AAP7G9gv0vv4VjZZldflrGzu6zezsbDU2Nnof586d69L7AwDgT2z7utyIiAgFBAS0OeKuq6trc2TeEZGRkR3eZnBwsIKDgzv9ngAA+DPbjuyDgoLkcrlUXFzss7y4uFhJSUmd3m5iYmKbbR46dKhL2wQA4E5m6x/CycrKUlpamhISEpSYmKht27apurpa6enpkq6dXq+pqdGuXf93FXtFRYWka1eTnj9/XhUVFQoKClJcXJwkacmSJXr00Uf16quvatasWTpw4IDef/99/eY3v7nt+wcAgD+wNfapqalqaGjQmjVrVFtbq/j4eBUVFSk6OlrStS/R+fo99+PHj/f+XFZWprffflvR0dE6c+aMJCkpKUl79uzRqlWrtHr1ao0YMUKFhYWaOHHibdsvAAD8ia332fsr7rPH3Yb77O8u3Gd/97H9anwAANCziD0AAIYj9gAAGM7WC/QAoCO47qV7OFqaFfZXzyev3iMrMMi2+ZjA36974cgeAADDEXsAAAxH7AEAMByxBwDAcMQeAADDEXsAAAxH7AEAMByxBwDAcMQeAADDEXsAAAxH7AEAMByxBwDAcMQeAADDEXsAAAxH7AEAMByxBwDAcMQeAADDEXsAAAxH7AEAMFyg3RMAANxeVkBvNY552uc5zEbsAeBu43DICgyyexa4jTiNDwCA4Yg9AACGI/YAABiO2AMAYDhiDwCA4Yg9AACGI/YAABiO2AMAYDhiDwCA4Yg9AACGI/YAABiO2AMAYDhiDwCA4Yg9AACGI/YAABiO2AMAYDhiDwCA4Yg9AACGI/YAABiO2AMAYDhiDwCA4Yg9AACGI/YAABjO9tjn5eUpJiZGISEhcrlcOnr06E3HHzlyRC6XSyEhIRo+fLi2bt3aZszGjRs1atQo9enTR1FRUcrMzNRXX33VU7sAAIBfszX2hYWFysjIUE5OjsrLy5WcnKzp06erurq63fFVVVWaMWOGkpOTVV5erpUrV2rx4sXau3evd8xbb72lFStW6KWXXlJlZaUKCgpUWFio7Ozs27VbAAD4lUA733zDhg1asGCBFi5cKOnaEfl7772n/Px85ebmthm/detWDRs2TBs3bpQkxcbG6sSJE1q/fr3mzJkjSSotLdXDDz+suXPnSpLuu+8+Pf300zp+/Pjt2SkAAPyMbUf2zc3NKisrU0pKis/ylJQUlZSUtLtOaWlpm/FTp07ViRMndOXKFUnSI488orKyMm/cT58+raKiIv3d3/1dD+wFAAD+z7Yj+/r6erW2tsrpdPosdzqdcrvd7a7jdrvbHd/S0qL6+noNHjxYTz31lM6fP69HHnlElmWppaVFixYt0ooVK244l6amJjU1NXmfezyeLuwZAAD+xfYL9BwOh89zy7LaLPum8X+9/PDhw/rpT3+qvLw8ffTRR9q3b59++ctf6pVXXrnhNnNzcxUWFuZ9REVFdXZ3AADwO7Yd2UdERCggIKDNUXxdXV2bo/frIiMj2x0fGBiogQMHSpJWr16ttLQ073UADzzwgC5fvqx//ud/Vk5Ojnr1avvvm+zsbGVlZXmfezwegg8AMIZtR/ZBQUFyuVwqLi72WV5cXKykpKR210lMTGwz/tChQ0pISFDv3r0lSV9++WWboAcEBMiyLO9ZgK8LDg5Wv379fB4AAJjC1tP4WVlZev3117Vjxw5VVlYqMzNT1dXVSk9Pl3TtiHv+/Pne8enp6Tp79qyysrJUWVmpHTt2qKCgQEuXLvWOmTlzpvLz87Vnzx5VVVWpuLhYq1ev1uOPP66AgIDbvo8AANjN1lvvUlNT1dDQoDVr1qi2tlbx8fEqKipSdHS0JKm2ttbnnvuYmBgVFRUpMzNTW7Zs0ZAhQ7Rp0ybvbXeStGrVKjkcDq1atUo1NTUaNGiQZs6cqZ/+9Ke3ff8AAPAHDutG57bvYh6PR2FhYWpsbOzWU/quZbu6bVtAdypbN/+bB/kBPkPwV/7+GerQkf3Vq1f16aef6oEHHpB07Utumpubva8HBARo0aJF7V4EBwAA7NGh2O/Zs0f/+q//qiNHjkiSli1bpv79+ysw8Npm6uvrFRISogULFnT/TAEAQKd06BB8586d3ovnrjty5IiqqqpUVVWldevW6Re/+EW3ThAAAHRNh2JfWVmpuLi4G74+adIknTx5ssuTAgAA3adDp/Hr6+sVGhrqfX769Gnvl9lIUu/evXX58uXumx0AAOiyDh3ZO51OnTp1yvt80KBBPhfjVVZWKjIysvtmBwAAuqxDsX/sscdueL+6ZVnKzc3VY4891i0TAwAA3aNDp/FzcnI0YcIETZw4UUuXLtXIkSPlcDj02Wefaf369Tp16pR27eI+WAAA/EmHYj9ixAgVFxfr2WefVWpqqvcvzVmWpdGjR+vQoUO6//77e2SiAACgczr8dbkPPfSQ/vCHP6iiokKff/65JOnb3/62xo8f3+2TAwAAXdfh2Hs8HoWGhmrcuHEaN26cd/nVq1d16dIl/mIcAAB+pkMX6O3fv18JCQn66quv2rz21Vdf6cEHH9TBgwe7bXIAAKDrOhT7/Px8/fCHP9S3vvWtNq9961vf0vLly7V58+ZumxwAAOi6DsX+k08+0eTJk2/4+qOPPqqPP/64q3MCAADdqEOxv3DhglpaWm74+pUrV3ThwoUuTwoAAHSfDsX+vvvu04kTJ274+okTJxQdHd3lSQEAgO7Todj/4z/+o3JycvTnP/+5zWtut1urVq3SnDlzum1yAACg6zp0692KFSt04MABffvb39a8efM0atQoORwOVVZW6q233lJUVJRWrFjRU3MFAACd0KHY9+3bV8eOHVN2drYKCwu9v58PDw/XvHnztHbtWvXt27dHJgoAADqnw1+qExYWpry8PG3ZskX19fWyLEuDBg3yfnUuAADwLx2O/XUNDQ06e/asHA6HAgICfP6uPQAA8B8dukBPkj799FM9+uijcjqdmjhxoh566CHde++9+u53v+vzt+4BAIB/6NCRvdvt1qRJkzRo0CBt2LBBo0ePlmVZ+sMf/qDt27crOTlZn3zyie69996emi8AAOigDsX+5z//uaKjo3Xs2DGFhIR4l0+bNk2LFi3SI488op///OfKzc3t9okCAIDO6dBp/OLiYi1fvtwn9Nf16dNHy5Yt03vvvddtkwMAAF3XodifPn1aEyZMuOHrCQkJOn36dJcnBQAAuk+HYn/x4sWb/r36vn376tKlS12eFAAA6D4dvvXu4sWL7Z7GlySPxyPLsro8KQAA0H06FHvLsjRy5Mibvs6X6wAA4F86FPsPP/ywp+YBAAB6SIdiP2nSpJ6aBwAA6CEdin2vXr2+8TS9w+FQS0tLlyYFAAC6T4div3///hu+VlJSon/5l3/hAj0AAPxMh2I/a9asNss+++wzZWdn6+DBg3rmmWf0yiuvdNvkAABA13X4D+Fc97//+796/vnnNWbMGLW0tKiiokJvvvmmhg0b1p3zAwAAXdTh2Dc2Nmr58uW6//779emnn+qDDz7QwYMHFR8f3xPzAwAAXdSh0/ivvfaaXn31VUVGRmr37t3tntYHAAD+pUOxX7Fihfr06aP7779fb775pt588812x+3bt69bJgcAALquQ7GfP38+35AHAMAdpkOxf+ONN3poGgAAoKd0+mp8AABwZyD2AAAYjtgDAGA4Yg8AgOGIPQAAhiP2AAAYjtgDAGA4Yg8AgOFsj31eXp5iYmIUEhIil8ulo0eP3nT8kSNH5HK5FBISouHDh2vr1q1txvzlL3/RCy+8oMGDByskJESxsbEqKirqqV0AAMCv2Rr7wsJCZWRkKCcnR+Xl5UpOTtb06dNVXV3d7viqqirNmDFDycnJKi8v18qVK7V48WLt3bvXO6a5uVl/+7d/qzNnzujf//3fderUKW3fvl1Dhw69XbsFAIBf6dDX5Xa3DRs2aMGCBVq4cKEkaePGjXrvvfeUn5+v3NzcNuO3bt2qYcOGaePGjZKk2NhYnThxQuvXr9ecOXMkSTt27NAXX3yhkpIS9e7dW5IUHR19e3YIAAA/ZNuRfXNzs8rKypSSkuKzPCUlRSUlJe2uU1pa2mb81KlTdeLECV25ckWS9M477ygxMVEvvPCCnE6n4uPjtXbtWrW2tvbMjgAA4OdsO7Kvr69Xa2urnE6nz3Kn0ym3293uOm63u93xLS0tqq+v1+DBg3X69Gn9+te/1jPPPKOioiL98Y9/1AsvvKCWlhb96Ec/ane7TU1Nampq8j73eDxd3DsAAPyH7Rfoff1P5lqWddM/o9ve+L9efvXqVd17773atm2bXC6XnnrqKeXk5Cg/P/+G28zNzVVYWJj3ERUV1dndAQDA79gW+4iICAUEBLQ5iq+rq2tz9H5dZGRku+MDAwM1cOBASdLgwYM1cuRIBQQEeMfExsbK7Xarubm53e1mZ2ersbHR+zh37lxXdg0AAL9iW+yDgoLkcrlUXFzss7y4uFhJSUntrpOYmNhm/KFDh5SQkOC9GO/hhx/Wn/70J129etU75vPPP9fgwYMVFBTU7naDg4PVr18/nwcAAKaw9TR+VlaWXn/9de3YsUOVlZXKzMxUdXW10tPTJV074p4/f753fHp6us6ePausrCxVVlZqx44dKigo0NKlS71jFi1apIaGBi1ZskSff/65/vM//1Nr167VCy+8cNv3DwAAf2DrrXepqalqaGjQmjVrVFtbq/j4eBUVFXlvlautrfW55z4mJkZFRUXKzMzUli1bNGTIEG3atMl7250kRUVF6dChQ8rMzNSYMWM0dOhQLVmyRMuXL7/t+wcAgD9wWNevcIOXx+NRWFiYGhsbu/WUvmvZrm7bFtCdytbN/+ZBfoDPEPyVv3+GbL8aHwAA9CxiDwCA4Yg9AACGI/YAABiO2AMAYDhiDwCA4Yg9AACGI/YAABiO2AMAYDhiDwCA4Yg9AACGI/YAABiO2AMAYDhiDwCA4Yg9AACGI/YAABiO2AMAYDhiDwCA4Yg9AACGI/YAABiO2AMAYDhiDwCA4Yg9AACGI/YAABiO2AMAYDhiDwCA4Yg9AACGI/YAABiO2AMAYDhiDwCA4Yg9AACGI/YAABiO2AMAYDhiDwCA4Yg9AACGI/YAABiO2AMAYDhiDwCA4Yg9AACGI/YAABiO2AMAYDhiDwCA4Yg9AACGI/YAABiO2AMAYDhiDwCA4Yg9AACGI/YAABiO2AMAYDjbY5+Xl6eYmBiFhITI5XLp6NGjNx1/5MgRuVwuhYSEaPjw4dq6desNx+7Zs0cOh0OzZ8/u5lkDAHDnsDX2hYWFysjIUE5OjsrLy5WcnKzp06erurq63fFVVVWaMWOGkpOTVV5erpUrV2rx4sXau3dvm7Fnz57V0qVLlZyc3NO7AQCAX7M19hs2bNCCBQu0cOFCxcbGauPGjYqKilJ+fn6747du3aphw4Zp48aNio2N1cKFC/VP//RPWr9+vc+41tZWPfPMM/rxj3+s4cOH345dAQDAb9kW++bmZpWVlSklJcVneUpKikpKStpdp7S0tM34qVOn6sSJE7py5Yp32Zo1azRo0CAtWLDglubS1NQkj8fj8wAAwBS2xb6+vl6tra1yOp0+y51Op9xud7vruN3udse3tLSovr5eknTs2DEVFBRo+/bttzyX3NxchYWFeR9RUVEd3BsAAPyX7RfoORwOn+eWZbVZ9k3jry+/ePGi5s2bp+3btysiIuKW55Cdna3Gxkbv49y5cx3YAwAA/FugXW8cERGhgICANkfxdXV1bY7er4uMjGx3fGBgoAYOHKhPP/1UZ86c0cyZM72vX716VZIUGBioU6dOacSIEW22GxwcrODg4K7uEgAAfsm2I/ugoCC5XC4VFxf7LC8uLlZSUlK76yQmJrYZf+jQISUkJKh3794aPXq0Pv74Y1VUVHgfjz/+uL7zne+ooqKC0/MAgLuSbUf2kpSVlaW0tDQlJCQoMTFR27ZtU3V1tdLT0yVdO71eU1OjXbt2SZLS09O1efNmZWVl6fnnn1dpaakKCgq0e/duSVJISIji4+N93qN///6S1GY5AAB3C1tjn5qaqoaGBq1Zs0a1tbWKj49XUVGRoqOjJUm1tbU+99zHxMSoqKhImZmZ2rJli4YMGaJNmzZpzpw5du0CAAB+z2Fdv8INXh6PR2FhYWpsbFS/fv26bbuuZbu6bVtAdypbN9/uKdwSPkPwV/7+GbL9anwAANCziD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgONtjn5eXp5iYGIWEhMjlcuno0aM3HX/kyBG5XC6FhIRo+PDh2rp1q8/r27dvV3JyssLDwxUeHq4pU6bo+PHjPbkLAAD4NVtjX1hYqIyMDOXk5Ki8vFzJycmaPn26qqur2x1fVVWlGTNmKDk5WeXl5Vq5cqUWL16svXv3esccPnxYTz/9tD788EOVlpZq2LBhSklJUU1Nze3aLQAA/IrDsizLrjefOHGiJkyYoPz8fO+y2NhYzZ49W7m5uW3GL1++XO+8844qKyu9y9LT03Xy5EmVlpa2+x6tra0KDw/X5s2bNX/+/Fual8fjUVhYmBobG9WvX78O7tWNuZbt6rZtAd2pbN2tfTbsxmcI/srfP0O2Hdk3NzerrKxMKSkpPstTUlJUUlLS7jqlpaVtxk+dOlUnTpzQlStX2l3nyy+/1JUrVzRgwIAbzqWpqUkej8fnAQCAKWyLfX19vVpbW+V0On2WO51Oud3udtdxu93tjm9paVF9fX2766xYsUJDhw7VlClTbjiX3NxchYWFeR9RUVEd3BsAAPyX7RfoORwOn+eWZbVZ9k3j21suSa+99pp2796tffv2KSQk5IbbzM7OVmNjo/dx7ty5juwCAAB+LdCuN46IiFBAQECbo/i6uro2R+/XRUZGtjs+MDBQAwcO9Fm+fv16rV27Vu+//77GjBlz07kEBwcrODi4E3sBAID/s+3IPigoSC6XS8XFxT7Li4uLlZSU1O46iYmJbcYfOnRICQkJ6t27t3fZunXr9Morr+hXv/qVEhISun/yAADcQWw9jZ+VlaXXX39dO3bsUGVlpTIzM1VdXa309HRJ106v//UV9Onp6Tp79qyysrJUWVmpHTt2qKCgQEuXLvWOee2117Rq1Srt2LFD9913n9xut9xuty5dunTb9w8AAH9g22l8SUpNTVVDQ4PWrFmj2tpaxcfHq6ioSNHR0ZKk2tpan3vuY2JiVFRUpMzMTG3ZskVDhgzRpk2bNGfOHO+YvLw8NTc364knnvB5r5deekkvv/zybdkvAAD8ia332fsr7rPH3cbf7xG+js8Q/JW/f4ZsvxofAAD0LGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGM722Ofl5SkmJkYhISFyuVw6evToTccfOXJELpdLISEhGj58uLZu3dpmzN69exUXF6fg4GDFxcVp//79PTV9AAD8nq2xLywsVEZGhnJyclReXq7k5GRNnz5d1dXV7Y6vqqrSjBkzlJycrPLycq1cuVKLFy/W3r17vWNKS0uVmpqqtLQ0nTx5Umlpafre976n3/3ud7drtwAA8CsOy7Isu9584sSJmjBhgvLz873LYmNjNXv2bOXm5rYZv3z5cr3zzjuqrKz0LktPT9fJkydVWloqSUpNTZXH49G7777rHTNt2jSFh4dr9+7dtzQvj8ejsLAwNTY2ql+/fp3dvTZcy3Z127aA7lS2br7dU7glfIbgr/z9M2TbkX1zc7PKysqUkpLiszwlJUUlJSXtrlNaWtpm/NSpU3XixAlduXLlpmNutE0AAEwXaNcb19fXq7W1VU6n02e50+mU2+1udx23293u+JaWFtXX12vw4ME3HHOjbUpSU1OTmpqavM8bGxslXTvC706tTf+vW7cHdJfu/m+9p/AZgr/qqc9Q37595XA4urwd22J/3dd3wrKsm+5Ye+O/vryj28zNzdWPf/zjNsujoqJuPHHAIGH/km73FIA7Wk99hrrr18m2xT4iIkIBAQFtjrjr6uraHJlfFxkZ2e74wMBADRw48KZjbrRNScrOzlZWVpb3+dWrV/XFF19o4MCB3fIvKnQ/j8ejqKgonTt3rluvqwDuFnyG7gx9+/btlu3YFvugoCC5XC4VFxfrH/7hH7zLi4uLNWvWrHbXSUxM1MGDB32WHTp0SAkJCerdu7d3THFxsTIzM33GJCUl3XAuwcHBCg4O9lnWv3//ju4SbNCvXz/+RwV0AZ+hu4Otp/GzsrKUlpamhIQEJSYmatu2baqurlZ6+rXTIdnZ2aqpqdGuXdeuwE1PT9fmzZuVlZWl559/XqWlpSooKPC5yn7JkiV69NFH9eqrr2rWrFk6cOCA3n//ff3mN7+xZR8BALCbrbFPTU1VQ0OD1qxZo9raWsXHx6uoqEjR0dGSpNraWp977mNiYlRUVKTMzExt2bJFQ4YM0aZNmzRnzhzvmKSkJO3Zs0erVq3S6tWrNWLECBUWFmrixIm3ff8AAPAHtt5nD3RWU1OTcnNzlZ2d3eZXMAC+GZ+huwuxBwDAcLZ/Nz4AAOhZxB4AAMMRewAADEfs4XdKSkoUEBCgadOm+Sw/c+aMHA6HKioq2l2vtbVVubm5Gj16tPr06aMBAwbob/7mb7Rz587bMGvgzvDss89q9uzZbX6G2Wz/ulzg63bs2KEXX3xRr7/+uqqrqzVs2LBbWu/ll1/Wtm3btHnzZiUkJMjj8ejEiRO6cOFCD88YAPwbsYdfuXz5sv7t3/5Nv//97+V2u/XGG2/oRz/60S2te/DgQX3/+9/Xk08+6V02duzYnpoqANwxOI0Pv1JYWKhRo0Zp1KhRmjdvnnbu3KlbvTs0MjJSv/71r3X+/PkeniUA3FmIPfxKQUGB5s2bJ0maNm2aLl26pA8++OCW1t2wYYPOnz+vyMhIjRkzRunp6Xr33Xd7croAcEcg9vAbp06d0vHjx/XUU09JkgIDA5WamqodO3bc0vpxcXH65JNP9Nvf/lbPPfec/vznP2vmzJlauHBhT04bAPwev7OH3ygoKFBLS4uGDh3qXWZZlnr37n3LF9n16tVLDz74oB588EFlZmbqF7/4hdLS0pSTk6OYmJiemjoA+DWO7OEXWlpatGvXLv3sZz9TRUWF93Hy5ElFR0frrbfe6tR24+LiJF278A8A7lYc2cMv/PKXv9SFCxe0YMEChYWF+bz2xBNPqKCgQH//938v6drp/q+Li4vT3Llz9fDDDyspKUmRkZGqqqpSdna2Ro4cqdGjR9+W/QAAf0Ts4RcKCgo0ZcqUNqGXpDlz5mjt2rX64osvJMn7O/2/VlVVpalTp2r37t3Kzc1VY2OjIiMj9d3vflcvv/yyAgP5Tx3A3Yu/egcAgOH4nT0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD2A22ry5MnKyMi45fFvvPGG+vfv32PzAe4GxB4AAMMRewAADEfsAUi6dnr9xRdfVEZGhsLDw+V0OrVt2zZdvnxZzz33nPr27asRI0bo3Xff9a5z5MgRPfTQQwoODtbgwYO1YsUKtbS0eF+/fPmy5s+fr9DQUA0ePFg/+9nP2rxvc3OzfvjDH2ro0KG65557NHHiRB0+fPh27DJw1yD2ALzefPNNRURE6Pjx43rxxRe1aNEiPfnkk0pKStJHH32kqVOnKi0tTV9++aVqamo0Y8YMPfjggzp58qTy8/NVUFCgn/zkJ97tLVu2TB9++KH279+vQ4cO6fDhwyorK/N5z+eee07Hjh3Tnj179F//9V968sknNW3aNP3xj3+83bsPmMsCAMuyJk2aZD3yyCPe5y0tLdY999xjpaWleZfV1tZakqzS0lJr5cqV1qhRo6yrV696X9+yZYsVGhpqtba2WhcvXrSCgoKsPXv2eF9vaGiw+vTpYy1ZssSyLMv605/+ZDkcDqumpsZnLo899piVnZ1tWZZl7dy50woLC+uBPQbuHvyRbwBeY8aM8f4cEBCggQMH6oEHHvAuczqdkqS6ujpVVlYqMTFRDofD+/rDDz+sS5cu6X/+53904cIFNTc3KzEx0fv6gAEDNGrUKO/zjz76SJZlaeTIkT7zaGpq0sCBA7t9/4C7FbEH4NW7d2+f5w6Hw2fZ9bBfvXpVlmX5hF6SLMvyjrv+881cvXpVAQEBKisrU0BAgM9roaGhndoHAG0RewCdEhcXp7179/pEv6SkRH379tXQoUMVHh6u3r1767e//a2GDRsmSbpw4YI+//xzTZo0SZI0fvx4tba2qq6uTsnJybbtC2A6LtAD0Cnf//73de7cOb344ov67LPPdODAAb300kvKyspSr169FBoaqgULFmjZsmX64IMP9Mknn+jZZ59Vr17/97+dkSNH6plnntH8+fO1b98+VVVV6fe//71effVVFRUV2bh3gFk4sgfQKUOHDlVRUZGWLVumsWPHasCAAVqwYIFWrVrlHbNu3TpdunRJjz/+uPr27asf/OAHamxs9NnOzp079ZOf/EQ/+MEPVFNTo4EDByoxMVEzZsy43bsEGMth3cov1gAAwB2L0/gAABiO2AMAYDhiDwCA4Yg9AACGI/YAABiO2AMAYDhiDwCA4Yg9AACGI/YAABiO2AMAYDhiDwCA4Yg9AACG+/8bDdKpZQN+HgAAAABJRU5ErkJggg==", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfsAAAHpCAYAAACFlZVCAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8ekN5oAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAoyUlEQVR4nO3df3DU9YH/8deSn9RADAE3wIQYsEAyEZWNcolGuMqFH3cKc2hTlTB64E2oIyQpFEKgKFYyClKGQsKBCcpUITdFBu3FSvSEoSYtJSa02ojXIRAuzRYSuSzg14SEz/cPhq3bDUjIhs/y5vmY2Znse9/72fdnxvXJ55PPbhyWZVkCAADG6mf3AgAAQN8i9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGLfDcuy5PF4xFcQAABMQOy7cebMGUVHR+vMmTN2LwUAgF4j9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGC7U7gUAPWVZls6dO+e9f8stt8jhcNi4IgAIbsQeN5xz585pxowZ3vt79uxRVFSUjSsCgODGaXwAAAxH7AEAMByxBwDAcMQeAADDEXsAAAxH7AEAMByxBwDAcMQeAADDEXsAAAxH7AEAMByxBwDAcMQeAADDEXsAAAxH7AEAMByxBwDAcMQeAADDEXsAAAxH7AEAMByxBwDAcMQeAADDEXsAAAwXavcCbiauxdvtXoIRHJ0div7G/UkrdsoKDbdtPSaoWTPH7iUA6EMc2QMAYDhiDwCA4Yg9AACGI/YAABiO2AMAYDhiDwCA4Yg9AACGI/YAABiO2AMAYDhiDwCA4Yg9AACGI/YAABiO2AMAYDjbY19cXKzExERFRkbK5XLpwIEDl53b3NysJ554QmPGjFG/fv2Um5vrN2fr1q3KyMhQTEyMYmJiNHnyZB08eLAP9wAAgOBma+zLy8uVm5urwsJC1dbWKiMjQ9OmTVNjY2O389vb2zVkyBAVFhbqrrvu6nbOvn379Pjjj+ujjz5SdXW1RowYoczMTDU1NfXlrgAAELQclmVZdr34hAkTNH78eJWUlHjHkpKSNHPmTBUVFV3xuZMmTdLdd9+t9evXX3FeV1eXYmJitHHjRs2Z0/3f7G5vb1d7e7v3vsfjUXx8vNra2jRw4MCr36Fvwd+zDwxHZ4ei/7DDe79t3OP8Pfte4u/ZA2az7ci+o6NDNTU1yszM9BnPzMxUVVVVwF7nq6++0vnz5zVo0KDLzikqKlJ0dLT3Fh8fH7DXBwDAbrbFvqWlRV1dXXI6nT7jTqdTbrc7YK+zdOlSDR8+XJMnT77snIKCArW1tXlvJ06cCNjrAwBgt1C7F+BwOHzuW5blN3atXnnlFe3YsUP79u1TZGTkZedFREQoIiIiIK8JAECwsS32gwcPVkhIiN9R/MmTJ/2O9q/F2rVrtXr1an3wwQcaN25cr7cHAMCNyrbT+OHh4XK5XKqsrPQZr6ysVHp6eq+2vWbNGr344ov69a9/rdTU1F5tCwCAG52tp/Hz8/OVnZ2t1NRUpaWlacuWLWpsbFROTo6ki79Lb2pq0vbtf7uKva6uTpJ09uxZnTp1SnV1dQoPD1dycrKki6fuV6xYobfeeku3336798xBVFSUoqKiru8OAgAQBGyNfVZWllpbW7Vq1So1NzcrJSVFFRUVSkhIkHTxS3T+/jP399xzj/fnmpoavfXWW0pISNCxY8ckXfySno6ODj366KM+z1u5cqWef/75Pt0fAACCka2fsw9WHo9H0dHRfM4+WFmWHF3n/3Y3JEwK0EWdNys+Zw+Yzfar8YEeczj4Eh0A6AHbvxsfAAD0LWIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgONtjX1xcrMTEREVGRsrlcunAgQOXndvc3KwnnnhCY8aMUb9+/ZSbm9vtvF27dik5OVkRERFKTk7W7t27+2j1AAAEP1tjX15ertzcXBUWFqq2tlYZGRmaNm2aGhsbu53f3t6uIUOGqLCwUHfddVe3c6qrq5WVlaXs7GwdPnxY2dnZ+v73v6/f/e53fbkrAAAELYdlWZZdLz5hwgSNHz9eJSUl3rGkpCTNnDlTRUVFV3zupEmTdPfdd2v9+vU+41lZWfJ4PHrvvfe8Y1OnTlVMTIx27NhxVevyeDyKjo5WW1ubBg4cePU79C1ci7cHbFtAINWsmWP3EgD0IduO7Ds6OlRTU6PMzEyf8czMTFVVVV3zdqurq/22OWXKlCtus729XR6Px+cGAIApbIt9S0uLurq65HQ6fcadTqfcbvc1b9ftdvd4m0VFRYqOjvbe4uPjr/n1AQAINrZfoOdwOHzuW5blN9bX2ywoKFBbW5v3duLEiV69PgAAwSTUrhcePHiwQkJC/I64T5486Xdk3hNxcXE93mZERIQiIiKu+TUB4EZiWZbOnTvnvX/LLbf0+iALwc22I/vw8HC5XC5VVlb6jFdWVio9Pf2at5uWlua3zb179/ZqmwBgknPnzmnGjBne2zfDDzPZdmQvSfn5+crOzlZqaqrS0tK0ZcsWNTY2KicnR9LF0+tNTU3avv1vV7HX1dVJks6ePatTp06prq5O4eHhSk5OliQtXLhQDz74oF5++WXNmDFDe/bs0QcffKDf/OY3133/AAAIBrbGPisrS62trVq1apWam5uVkpKiiooKJSQkSLr4JTp//5n7e+65x/tzTU2N3nrrLSUkJOjYsWOSpPT0dO3cuVPLly/XihUrNGrUKJWXl2vChAnXbb8AAAgmtn7OPljxOXvcbPic/c3l7NmzmjFjhvf+nj17FBUVZeOK0NdsvxofAAD0LWIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYLtXsBAHC1XIu3270EIzg6OxT9jfuTVuyUFRpu23pMULNmjt1LuCKO7AEAMByxBwDAcMQeAADDEXsAAAxH7AEAMByxBwDAcMQeAADDEXsAAAxH7AEAMByxBwDAcMQeAADDEXsAAAxH7AEAMByxBwDAcMQeAADDEXsAAAxH7AEAMByxBwDAcMQeAADDhdq9AADA9WWFhKlt3OM+92E2Yg8ANxuHQ1ZouN2rwHXEaXwAAAxH7AEAMByxBwDAcMQeAADDEXsAAAxH7AEAMByxBwDAcMQeAADDEXsAAAxH7AEAMByxBwDAcMQeAADDEXsAAAxne+yLi4uVmJioyMhIuVwuHThw4Irz9+/fL5fLpcjISI0cOVKbN2/2m7N+/XqNGTNG/fv3V3x8vPLy8vT111/31S4AABDUbI19eXm5cnNzVVhYqNraWmVkZGjatGlqbGzsdn5DQ4OmT5+ujIwM1dbWatmyZVqwYIF27drlnfPmm29q6dKlWrlyperr61VaWqry8nIVFBRcr90CACCo2Pr37NetW6e5c+dq3rx5ki4ekb///vsqKSlRUVGR3/zNmzdrxIgRWr9+vSQpKSlJhw4d0tq1azVr1ixJUnV1te6//3498cQTkqTbb79djz/+uA4ePHh9dgoAgCBj25F9R0eHampqlJmZ6TOemZmpqqqqbp9TXV3tN3/KlCk6dOiQzp8/L0l64IEHVFNT44370aNHVVFRoX/+53++7Fra29vl8Xh8bgAAmMK2I/uWlhZ1dXXJ6XT6jDudTrnd7m6f43a7u53f2dmplpYWDR06VD/4wQ906tQpPfDAA7IsS52dnZo/f76WLl162bUUFRXphRde6P1OAQAQhGy/QM/hcPjctyzLb+zb5n9zfN++fXrppZdUXFysTz75RG+//bZ+9atf6cUXX7zsNgsKCtTW1ua9nThx4lp3BwCAoGPbkf3gwYMVEhLidxR/8uRJv6P3S+Li4rqdHxoaqtjYWEnSihUrlJ2d7b0O4M4779S5c+f07//+7yosLFS/fv7/vomIiFBEREQgdgsAgKBj25F9eHi4XC6XKisrfcYrKyuVnp7e7XPS0tL85u/du1epqakKCwuTJH311Vd+QQ8JCZFlWd6zAAAA3ExsPY2fn5+v1157TWVlZaqvr1deXp4aGxuVk5Mj6eLp9Tlz5njn5+Tk6Pjx48rPz1d9fb3KyspUWlqqRYsWeec8/PDDKikp0c6dO9XQ0KDKykqtWLFCjzzyiEJCQq77PgIAYDdbP3qXlZWl1tZWrVq1Ss3NzUpJSVFFRYUSEhIkSc3NzT6fuU9MTFRFRYXy8vK0adMmDRs2TBs2bPB+7E6Sli9fLofDoeXLl6upqUlDhgzRww8/rJdeeum67x8AAMHAYXFu24/H41F0dLTa2to0cODAgG3XtXh7wLYFBFLNmjnfPikI8B5CsAr295DtV+MDAIC+RewBADAcsQcAwHDEHgAAwxF7AAAMR+wBADAcsQcAwHDEHgAAwxF7AAAM16Ovy71w4YI+++wz3XnnnZKkzZs3q6Ojw/t4SEiI5s+f3+1flgMAAPboUex37typ//iP/9D+/fslSYsXL9att96q0NCLm2lpaVFkZKTmzp0b+JUCAIBr0qND8G3btnn/It0l+/fvV0NDgxoaGrRmzRr94he/COgCAQBA7/Qo9vX19UpOTr7s4xMnTtThw4d7vSgAABA4PTqN39LSoqioKO/9o0ePKjY21ns/LCxM586dC9zqAABAr/XoyN7pdOrIkSPe+0OGDPG5GK++vl5xcXGBWx0AAOi1HsX+oYce0ksvvdTtY5ZlqaioSA899FBAFgYAAAKjR6fxCwsLNX78eE2YMEGLFi3S6NGj5XA49Pnnn2vt2rU6cuSItm/f3ldrBQAA16BHsR81apQqKyv11FNPKSsrSw6HQ9LFo/qxY8dq7969uuOOO/pkoQAA4Nr0KPaSdN999+lPf/qT6urq9MUXX0iSvvvd7+qee+4J+OIAAEDv9Tj2Ho9HUVFRuvvuu3X33Xd7xy9cuKCzZ89q4MCBgVwfAADopR5doLd7926lpqbq66+/9nvs66+/1r333qt33303YIsDAAC916PYl5SU6Mc//rG+853v+D32ne98R0uWLNHGjRsDtjgAANB7PYr9p59+qkmTJl328QcffFB//OMfe7smAAAQQD2K/enTp9XZ2XnZx8+fP6/Tp0/3elEAACBwehT722+/XYcOHbrs44cOHVJCQkKvFwUAAAKnR7H/13/9VxUWFuqvf/2r32Nut1vLly/XrFmzArY4AADQez366N3SpUu1Z88effe739Xs2bM1ZswYORwO1dfX680331R8fLyWLl3aV2sFAADXoEexHzBggD7++GMVFBSovLzc+/v5mJgYzZ49W6tXr9aAAQP6ZKEAAODa9PhLdaKjo1VcXKxNmzappaVFlmVpyJAh3q/OBQAAwaXHsb+ktbVVx48fl8PhUEhIiM/ftQcAAMGjRxfoSdJnn32mBx98UE6nUxMmTNB9992n2267Td/73vd8/tY9AAAIDj06sne73Zo4caKGDBmidevWaezYsbIsS3/605+0detWZWRk6NNPP9Vtt93WV+sFAAA91KPY/+xnP1NCQoI+/vhjRUZGesenTp2q+fPn64EHHtDPfvYzFRUVBXyhAADg2vToNH5lZaWWLFniE/pL+vfvr8WLF+v9998P2OIAAEDv9Sj2R48e1fjx4y/7eGpqqo4ePdrrRQEAgMDpUezPnDlzxb9XP2DAAJ09e7bXiwIAAIHT44/enTlzptvT+JLk8XhkWVavFwUAAAKnR7G3LEujR4++4uN8uQ4AAMGlR7H/6KOP+modAACgj/Qo9hMnTuyrdQAAgD7So9j369fvW0/TOxwOdXZ29mpRAAAgcHoU+927d1/2saqqKv385z/nAj0AAIJMj2I/Y8YMv7HPP/9cBQUFevfdd/Xkk0/qxRdfDNjiAABA7/X4D+Fc8pe//EXPPPOMxo0bp87OTtXV1emNN97QiBEjArk+AADQSz2OfVtbm5YsWaI77rhDn332mT788EO9++67SklJ6Yv1AQCAXurRafxXXnlFL7/8suLi4rRjx45uT+sDAIDg0qPYL126VP3799cdd9yhN954Q2+88Ua3895+++2ALA4AAPRej2I/Z84cviEPAIAbTI9i//rrr/fRMgAAQF+55qvxAQDAjYHYAwBgONtjX1xcrMTEREVGRsrlcunAgQNXnL9//365XC5FRkZq5MiR2rx5s9+c//u//9Ozzz6roUOHKjIyUklJSaqoqOirXQAAIKjZGvvy8nLl5uaqsLBQtbW1ysjI0LRp09TY2Njt/IaGBk2fPl0ZGRmqra3VsmXLtGDBAu3atcs7p6OjQ//0T/+kY8eO6Ze//KWOHDmirVu3avjw4ddrtwAACCo9ukAv0NatW6e5c+dq3rx5kqT169fr/fffV0lJiYqKivzmb968WSNGjND69eslSUlJSTp06JDWrl2rWbNmSZLKysr05ZdfqqqqSmFhYZKkhISEK66jvb1d7e3t3vsejycQuwcAQFCw7ci+o6NDNTU1yszM9BnPzMxUVVVVt8+prq72mz9lyhQdOnRI58+flyS98847SktL07PPPiun06mUlBStXr1aXV1dl11LUVGRoqOjvbf4+Phe7h0AAMHDtti3tLSoq6tLTqfTZ9zpdMrtdnf7HLfb3e38zs5OtbS0SJKOHj2qX/7yl+rq6lJFRYWWL1+uV199VS+99NJl11JQUKC2tjbv7cSJE73cOwAAgoetp/El+X1Jj2VZV/zinu7mf3P8woULuu2227RlyxaFhITI5XLpL3/5i9asWaOf/OQn3W4zIiJCERERvdkNAACClm2xHzx4sEJCQvyO4k+ePOl39H5JXFxct/NDQ0MVGxsrSRo6dKjCwsIUEhLinZOUlCS3262Ojg6Fh4cHeE8AAAhutp3GDw8Pl8vlUmVlpc94ZWWl0tPTu31OWlqa3/y9e/cqNTXVezHe/fffrz//+c+6cOGCd84XX3yhoUOHEnoAwE3J1o/e5efn67XXXlNZWZnq6+uVl5enxsZG5eTkSLr4u/Q5c+Z45+fk5Oj48ePKz89XfX29ysrKVFpaqkWLFnnnzJ8/X62trVq4cKG++OIL/dd//ZdWr16tZ5999rrvHwAAwcDW39lnZWWptbVVq1atUnNzs1JSUlRRUeH9qFxzc7PPZ+4TExNVUVGhvLw8bdq0ScOGDdOGDRu8H7uTpPj4eO3du1d5eXkaN26chg8froULF2rJkiXXff8AAAgGDuvSFW7w8ng8io6OVltbmwYOHBiw7boWbw/YtoBAqlkz59snBQHeQwhWwf4esv3rcgEAQN8i9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGI7YAwBgOGIPAIDhiD0AAIYj9gAAGM722BcXFysxMVGRkZFyuVw6cODAFefv379fLpdLkZGRGjlypDZv3nzZuTt37pTD4dDMmTMDvGoAAG4ctsa+vLxcubm5KiwsVG1trTIyMjRt2jQ1NjZ2O7+hoUHTp09XRkaGamtrtWzZMi1YsEC7du3ym3v8+HEtWrRIGRkZfb0bAAAENVtjv27dOs2dO1fz5s1TUlKS1q9fr/j4eJWUlHQ7f/PmzRoxYoTWr1+vpKQkzZs3T//2b/+mtWvX+szr6urSk08+qRdeeEEjR468HrsCAEDQsi32HR0dqqmpUWZmps94Zmamqqqqun1OdXW13/wpU6bo0KFDOn/+vHds1apVGjJkiObOnXtVa2lvb5fH4/G5AQBgCtti39LSoq6uLjmdTp9xp9Mpt9vd7XPcbne38zs7O9XS0iJJ+vjjj1VaWqqtW7de9VqKiooUHR3tvcXHx/dwbwAACF62X6DncDh87luW5Tf2bfMvjZ85c0azZ8/W1q1bNXjw4KteQ0FBgdra2ry3EydO9GAPAAAIbqF2vfDgwYMVEhLidxR/8uRJv6P3S+Li4rqdHxoaqtjYWH322Wc6duyYHn74Ye/jFy5ckCSFhobqyJEjGjVqlN92IyIiFBER0dtdAgAgKNl2ZB8eHi6Xy6XKykqf8crKSqWnp3f7nLS0NL/5e/fuVWpqqsLCwjR27Fj98Y9/VF1dnff2yCOP6B//8R9VV1fH6XkAwE3JtiN7ScrPz1d2drZSU1OVlpamLVu2qLGxUTk5OZIunl5vamrS9u3bJUk5OTnauHGj8vPz9cwzz6i6ulqlpaXasWOHJCkyMlIpKSk+r3HrrbdKkt84AAA3C1tjn5WVpdbWVq1atUrNzc1KSUlRRUWFEhISJEnNzc0+n7lPTExURUWF8vLytGnTJg0bNkwbNmzQrFmz7NoFAACCnsO6dIUbvDwej6Kjo9XW1qaBAwcGbLuuxdsDti0gkGrWzLF7CVeF9xCCVbC/h2y/Gh8AAPQtYg8AgOGIPQAAhiP2AAAYjtgDAGA4Yg8AgOGIPQAAhiP2AAAYjtgDAGA4Yg8AgOGIPQAAhiP2AAAYjtgDAGA4Yg8AgOGIPQAAhiP2AAAYjtgDAGA4Yg8AgOGIPQAAhiP2AAAYjtgDAGA4Yg8AgOGIPQAAhiP2AAAYjtgDAGA4Yg8AgOGIPQAAhiP2AAAYjtgDAGA4Yg8AgOGIPQAAhiP2AAAYjtgDAGA4Yg8AgOGIPQAAhiP2AAAYjtgDAGA4Yg8AgOGIPQAAhiP2AAAYjtgDAGA4Yg8AgOGIPQAAhiP2AAAYjtgDAGA4Yg8AgOGIPQAAhiP2AAAYjtgDAGA4Yg8AgOGIPQAAhrM99sXFxUpMTFRkZKRcLpcOHDhwxfn79++Xy+VSZGSkRo4cqc2bN/s8vnXrVmVkZCgmJkYxMTGaPHmyDh482Je7AABAULM19uXl5crNzVVhYaFqa2uVkZGhadOmqbGxsdv5DQ0Nmj59ujIyMlRbW6tly5ZpwYIF2rVrl3fOvn379Pjjj+ujjz5SdXW1RowYoczMTDU1NV2v3QIAIKg4LMuy7HrxCRMmaPz48SopKfGOJSUlaebMmSoqKvKbv2TJEr3zzjuqr6/3juXk5Ojw4cOqrq7u9jW6uroUExOjjRs3as6cOd3OaW9vV3t7u/e+x+NRfHy82traNHDgwGvdPT+uxdsDti0gkGrWdP/eCDa8hxCsgv09ZNuRfUdHh2pqapSZmekznpmZqaqqqm6fU11d7Td/ypQpOnTokM6fP9/tc7766iudP39egwYNuuxaioqKFB0d7b3Fx8f3cG8AAAhetsW+paVFXV1dcjqdPuNOp1Nut7vb57jd7m7nd3Z2qqWlpdvnLF26VMOHD9fkyZMvu5aCggK1tbV5bydOnOjh3gAAELxC7V6Aw+HwuW9Zlt/Yt83vblySXnnlFe3YsUP79u1TZGTkZbcZERGhiIiIniwbAIAbhm2xHzx4sEJCQvyO4k+ePOl39H5JXFxct/NDQ0MVGxvrM7527VqtXr1aH3zwgcaNGxfYxQMAcAOx7TR+eHi4XC6XKisrfcYrKyuVnp7e7XPS0tL85u/du1epqakKCwvzjq1Zs0Yvvviifv3rXys1NTXwiwcA4AZi60fv8vPz9dprr6msrEz19fXKy8tTY2OjcnJyJF38Xfo3r6DPycnR8ePHlZ+fr/r6epWVlam0tFSLFi3yznnllVe0fPlylZWV6fbbb5fb7Zbb7dbZs2ev+/4BABAMbP2dfVZWllpbW7Vq1So1NzcrJSVFFRUVSkhIkCQ1Nzf7fOY+MTFRFRUVysvL06ZNmzRs2DBt2LBBs2bN8s4pLi5WR0eHHn30UZ/XWrlypZ5//vnrsl8AAAQTWz9nH6w8Ho+io6P5nD1uGsH+GeFLeA8hWAX7e8j2r8sFAAB9i9gDAGA4Yg8AgOGIPQAAhiP2AAAYjtgDAGA4Yg8AgOGIPQAAhiP2AAAYjtgDAGA4Yg8AgOGIPQAAhiP2AAAYjtgDAGA4Yg8AgOGIPQAAhiP2AAAYjtgDAGA4Yg8AgOGIPQAAhiP2AAAYjtgDAGA4Yg8AgOGIPQAAhiP2AAAYjtgDAGA4Yg8AgOGIPQAAhiP2AAAYjtgDAGA4Yg8AgOGIPQAAhiP2AAAYjtgDAGA4Yg8AgOGIPQAAhiP2AAAYjtgDAGA4Yg8AgOGIPQAAhiP2AAAYjtgDAGA4Yg8AgOGIPQAAhiP2AAAYjtgDAGA4Yg8AgOGIPQAAhiP2AAAYjtgDAGA422NfXFysxMRERUZGyuVy6cCBA1ecv3//frlcLkVGRmrkyJHavHmz35xdu3YpOTlZERERSk5O1u7du/tq+QAABD1bY19eXq7c3FwVFhaqtrZWGRkZmjZtmhobG7ud39DQoOnTpysjI0O1tbVatmyZFixYoF27dnnnVFdXKysrS9nZ2Tp8+LCys7P1/e9/X7/73e+u124BABBUHJZlWXa9+IQJEzR+/HiVlJR4x5KSkjRz5kwVFRX5zV+yZIneeecd1dfXe8dycnJ0+PBhVVdXS5KysrLk8Xj03nvveedMnTpVMTEx2rFjx1Wty+PxKDo6Wm1tbRo4cOC17p4f1+LtAdsWEEg1a+bYvYSrwnsIwSrY30Ohdr1wR0eHampqtHTpUp/xzMxMVVVVdfuc6upqZWZm+oxNmTJFpaWlOn/+vMLCwlRdXa28vDy/OevXr7/sWtrb29Xe3u6939bWJuli9AOpq/3/BXR7QKAE+r/1vsJ7CMGqr95DAwYMkMPh6PV2bIt9S0uLurq65HQ6fcadTqfcbne3z3G73d3O7+zsVEtLi4YOHXrZOZfbpiQVFRXphRde8BuPj4+/2t0BbmjRP8+xewnADa2v3kOBOsNsW+wv+ft/sViWdcV/xXQ3/+/He7rNgoIC5efne+9fuHBBX375pWJjYwPyLyoEnsfjUXx8vE6cOBHQX7UANwveQzeGAQMGBGQ7tsV+8ODBCgkJ8TviPnnypN+R+SVxcXHdzg8NDVVsbOwV51xum5IUERGhiIgIn7Fbb731ancFNho4cCD/owJ6gffQzcG2q/HDw8PlcrlUWVnpM15ZWan09PRun5OWluY3f+/evUpNTVVYWNgV51xumwAAmM7W0/j5+fnKzs5Wamqq0tLStGXLFjU2Nion5+LvPgoKCtTU1KTt2y9egZuTk6ONGzcqPz9fzzzzjKqrq1VaWupzlf3ChQv14IMP6uWXX9aMGTO0Z88effDBB/rNb35jyz4CAGA3W2OflZWl1tZWrVq1Ss3NzUpJSVFFRYUSEhIkSc3NzT6fuU9MTFRFRYXy8vK0adMmDRs2TBs2bNCsWbO8c9LT07Vz504tX75cK1as0KhRo1ReXq4JEyZc9/1D34mIiNDKlSv9fv0C4OrwHrq52Po5ewAA0Pds/7pcAADQt4g9AACGI/YAABiO2AMAYDhij6BTVVWlkJAQTZ061Wf82LFjcjgcqqur6/Z5XV1dKioq0tixY9W/f38NGjRI//AP/6Bt27Zdh1UDN4annnpKM2fO9PsZZrP963KBv1dWVqbnnntOr732mhobGzVixIiret7zzz+vLVu2aOPGjUpNTZXH49GhQ4d0+vTpPl4xAAQ3Yo+gcu7cOf3nf/6nfv/738vtduv111/XT37yk6t67rvvvqsf/vCHeuyxx7xjd911V18tFQBuGJzGR1ApLy/XmDFjNGbMGM2ePVvbtm3T1X4VRFxcnP77v/9bp06d6uNVAsCNhdgjqJSWlmr27NmSpKlTp+rs2bP68MMPr+q569at06lTpxQXF6dx48YpJydH7733Xl8uFwBuCMQeQePIkSM6ePCgfvCDH0iSQkNDlZWVpbKysqt6fnJysj799FP99re/1dNPP62//vWvevjhhzVv3ry+XDYABD1+Z4+gUVpaqs7OTg0fPtw7ZlmWwsLCrvoiu379+unee+/Vvffeq7y8PP3iF79Qdna2CgsLlZiY2FdLB4CgxpE9gkJnZ6e2b9+uV199VXV1dd7b4cOHlZCQoDfffPOatpucnCzp4oV/AHCz4sgeQeFXv/qVTp8+rblz5yo6OtrnsUcffVSlpaX6l3/5F0kXT/f/veTkZD3xxBO6//77lZ6erri4ODU0NKigoECjR4/W2LFjr8t+AEAwIvYICqWlpZo8ebJf6CVp1qxZWr16tb788ktJ8v5O/5saGho0ZcoU7dixQ0VFRWpra1NcXJy+973v6fnnn1doKP+pA7h58SduAQAwHL+zBwDAcMQeAADDEXsAAAxH7AEAMByxBwDAcMQeAADDEXsAAAxH7AEAMByxB3BdTZo0Sbm5uVc9//XXX9ett97aZ+sBbgbEHgAAwxF7AAAMR+wBSLp4ev25555Tbm6uYmJi5HQ6tWXLFp07d05PP/20BgwYoFGjRum9997zPmf//v267777FBERoaFDh2rp0qXq7Oz0Pn7u3DnNmTNHUVFRGjp0qF599VW/1+3o6NCPf/xjDR8+XLfccosmTJigffv2XY9dBm4axB6A1xtvvKHBgwfr4MGDeu655zR//nw99thjSk9P1yeffKIpU6YoOztbX331lZqamjR9+nTde++9Onz4sEpKSlRaWqqf/vSn3u0tXrxYH330kXbv3q29e/dq3759qqmp8XnNp59+Wh9//LF27typP/zhD3rsscc0depU/c///M/13n3AXBYAWJY1ceJE64EHHvDe7+zstG655RYrOzvbO9bc3GxJsqqrq61ly5ZZY8aMsS5cuOB9fNOmTVZUVJTV1dVlnTlzxgoPD7d27tzpfby1tdXq37+/tXDhQsuyLOvPf/6z5XA4rKamJp+1PPTQQ1ZBQYFlWZa1bds2Kzo6ug/2GLh58Ee+AXiNGzfO+3NISIhiY2N15513esecTqck6eTJk6qvr1daWpocDof38fvvv19nz57V//7v/+r06dPq6OhQWlqa9/FBgwZpzJgx3vuffPKJLMvS6NGjfdbR3t6u2NjYgO8fcLMi9gC8wsLCfO47HA6fsUthv3DhgizL8gm9JFmW5Z136ecruXDhgkJCQlRTU6OQkBCfx6Kioq5pHwD4I/YArklycrJ27drlE/2qqioNGDBAw4cPV0xMjMLCwvTb3/5WI0aMkCSdPn1aX3zxhSZOnChJuueee9TV1aWTJ08qIyPDtn0BTMcFegCuyQ9/+EOdOHFCzz33nD7//HPt2bNHK1euVH5+vvr166eoqCjNnTtXixcv1ocffqhPP/1UTz31lPr1+9v/dkaPHq0nn3xSc+bM0dtvv62Ghgb9/ve/18svv6yKigob9w4wC0f2AK7J8OHDVVFRocWLF+uuu+7SoEGDNHfuXC1fvtw7Z82aNTp79qweeeQRDRgwQD/60Y/U1tbms51t27bppz/9qX70ox+pqalJsbGxSktL0/Tp06/3LgHGclhX84s1AABww+I0PgAAhiP2AAAYjtgDAGA4Yg8AgOGIPQAAhiP2AAAYjtgDAGA4Yg8AgOGIPQAAhiP2AAAYjtgDAGC4/w+4RNonQGoFYgAAAABJRU5ErkJggg==", "text/plain": [ "
" ] @@ -383,7 +383,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "dev-full", "language": "python", "name": "python3" }, diff --git a/docs/guide/batch.rst b/docs/guide/batch.rst index a1e219935..73e4c17a0 100644 --- a/docs/guide/batch.rst +++ b/docs/guide/batch.rst @@ -39,7 +39,7 @@ For an example, let's start with importing things to run a quick batch: Load and split some data: >>> data = load_movielens('data/ml-100k.zip') - >>> split = sample_users(data, 150, SampleN(5)) + >>> split = sample_users(data, 150, SampleN(5, rng=1024), rng=42) Configure and train the model: @@ -62,9 +62,9 @@ And measure their results: >>> measure.add_metric(RBP()) >>> scores = measure.compute(recs, split.test) >>> scores.list_summary() # doctest: +ELLIPSIS - mean median std + mean median std metric - RBP 0.07... 0.0... 0.1... + RBP 0.09... 0.0... 0.1... The :py:func:`predict` function works similarly, but for rating predictions; diff --git a/docs/guide/conventions.rst b/docs/guide/conventions.rst index 1e557417f..fa02eedfa 100644 --- a/docs/guide/conventions.rst +++ b/docs/guide/conventions.rst @@ -42,11 +42,11 @@ splitting support <./splitting>`_. Now that `SPEC 7`_ has standardized RNG seeding across the scientific Python ecosystem, we use that with some lightweight helpers in the - :mod:`lenskit.util.random` module instead of using SeedBank. + :mod:`lenskit.random` module instead of using SeedBank. LensKit extends SPEC 7 with a global RNG that components can use as a fallback, to make it easier to configure system-wide generation for things like tests. -This is configured with :func:`~lenskit.util.random.set_global_rng`. +This is configured with :func:`~lenskit.random.set_global_rng`. When implementing a component that uses randomness in its training, we recommend deferring conversion of the provided RNG into an actual generator until @@ -56,7 +56,7 @@ When using the RNG to create initial state for e.g. training a model with PyTorch, it can be useful to create that state in NumPy and then convert to a tensor, so that components are consistent in their random number generation behavior instead of having variation between NumPy and other backends. -Components can use the :func:`~lenskit.util.random_generator` function to +Components can use the :func:`~lenskit.random_generator` function to convert seed material or a generator into a NumPy generator, falling back to the global RNG if one is specified. diff --git a/docs/guide/examples/blendcomp.py b/docs/guide/examples/blendcomp.py new file mode 100644 index 000000000..1b832fb75 --- /dev/null +++ b/docs/guide/examples/blendcomp.py @@ -0,0 +1,43 @@ +from pydantic import BaseModel + +from lenskit.data import ItemList +from lenskit.pipeline import Component + + +class LinearBlendConfig(BaseModel): + "Configuration for :class:`LinearBlendScorer`." + + # define the parameter with a type, default value, and docstring. + mix_weight: float = 0.5 + r""" + Linear blending mixture weight :math:`\alpha`. + """ + + +class LinearBlendScorer(Component): + r""" + Score items with a linear blend of two other scores. + + Given a mixture weight :math:`\alpha` and two scores + :math:`s_i^{\mathrm{left}}` and :math:`s_i^{\mathrm{right}}`, this + computes :math:`s_i = \alpha s_i^{\mathrm{left}} + (1 - \alpha) + s_i^{\mathrm{right}}`. Missing values propagate, so only items + scored in both inputs have scores in the output. + """ + + # define the configuration attribute, with a docstring to make sure + # it shows up in component docs. + config: LinearBlendConfig + "Configuration parameters for the linear blend." + + # the __call__ method defines the component's operation + def __call__(self, left: ItemList, right: ItemList) -> ItemList: + """ + Blend the scores of two item lists. + """ + ls = left.scores("pandas", index="ids") + rs = right.scores("pandas", index="ids") + ls, rs = ls.align(rs) + alpha = self.config.mix_weight + combined = ls * alpha + rs * (1 - alpha) + return ItemList(item_ids=combined.index, scores=combined.values) diff --git a/docs/guide/migrating.rst b/docs/guide/migrating.rst index 171dc2396..ccc04957b 100644 --- a/docs/guide/migrating.rst +++ b/docs/guide/migrating.rst @@ -71,6 +71,11 @@ New code should use :py:func:`lenskit.data.from_interactions_df` to convert a Pa data frame into a :py:func:`~lenskit.data.Dataset`, or one of the standard loaders such as :py:func:`lenskit.data.load_movielens`. +While most LensKit data frame code still recognizes the legacy ``user`` and +``item`` columns from LensKit 0.14 and earlier, data frames of LensKit data +should use the column names ``user_id`` and ``item_id`` instead, to +unambiguously distinguish them from user and item numbers. + Additional dataset construction support and possible implementations (e.g. database-backed datasets) are coming, but this is the migration path for the typical code patterns used in LensKit 0.14 and earlier. @@ -180,10 +185,18 @@ them for very different ways of turning scoring models into full recommenders. .. note:: Since 2025, we no longer use the term “algorithm” in LensKit, as it is - ambiguous and promotes confusion about very different things. Instead we + ambiguous and promotes confusion about very different things. Instead, we have “pipelines” consisting of ”components”, some of which may be ”models” (for scoring, ranking, etc.). +Configuration Components +........................ + +Individual components now use Pydantic_ models to represent their configuration +(e.g. hyperparameters). This is to reduce redundancy, improve documentation, +enable consistent serialization, and validate parameter values in a consistent +and automated fashion. See :ref:`component-config` for details. + Obtaining Recommendations ------------------------- diff --git a/docs/guide/pipeline.rst b/docs/guide/pipeline.rst index 585889d6e..984a380ba 100644 --- a/docs/guide/pipeline.rst +++ b/docs/guide/pipeline.rst @@ -23,20 +23,6 @@ some more flexibility in configuring a recommendation pipeline with a standard design, and you can always fully configure the pipeline yourself for maximum flexibility. -.. todo:: - Redo some of those types with user & item data, etc. - -.. todo:: - Provide utility functions to make more common wiring operations easy so there - is middle ground between “give me a standard pipeline” and “make me do everything - myself”. - -.. todo:: - Rethink the “keyword inputs only” constraint in view of the limitation it - places on fallback or other compositional components — it's hard to specify - a component that implements fallback logic for an arbitrary number of - inputs. - Pipeline components are not limited to looking things up from training data — they can query databases, load files, and any other operations. A runtime pipeline can use some components (especially the scorer) trained from training @@ -107,7 +93,9 @@ A pipeline has a couple key concepts: types, and it is an error to provide an input value of an unexpected type. * A **component** processes input data and produces an output. It can be either a Python function or object (anything that implements the :class:`Component` - protocol) that takes inputs as keyword arguments and returns an output. + protocol) that takes zero or more inputs as keyword arguments and returns an + output. The pipeline will supply these inputs either from pipeline inputs + or from the outputs of other components. These are arranged in a directed acyclic graph, consisting of: @@ -135,9 +123,9 @@ value for that parameter when running the pipeline. Inputs can be connected to the following types: * A :class:`Node`, in which case the input will be provided from the - corresponding pipeline input or component return value. Nodes are - returned by :meth:`create_input` or :meth:`add_component`, and can be - looked up after creation with :meth:`node`. + corresponding pipeline input or component return value. Nodes are returned by + :meth:`~Pipeline.create_input` or :meth:`~Pipeline.add_component`, and can be + looked up after creation with :meth:`~Pipeline.node`. * A Python object, in which case that value will be provided directly to the component input argument. @@ -146,35 +134,34 @@ These input connections are specified via keyword arguments to the component's input name(s) and the node or data to which each input should be wired. -You can also use :meth:`Pipeline.add_default` to specify default connections. For example, -you can specify a default for ``user``:: +.. + You can also use :meth:`Pipeline.add_default` to specify default connections. For example, + you can specify a default for ``user``:: - pipe.add_default('user', user_history) + pipe.add_default('user', user_history) -With this default in place, if a component has an input named ``user`` and that -input is not explicitly connected to a node, then the ``user_history`` node will -be used to supply its value. Judicious use of defaults can reduce the amount of -code overhead needed to wire common pipelines. + With this default in place, if a component has an input named ``user`` and that + input is not explicitly connected to a node, then the ``user_history`` node will + be used to supply its value. Judicious use of defaults can reduce the amount of + code overhead needed to wire common pipelines. .. note:: You cannot directly wire an input another component using only that - component's name; if you only have a name, pass it to :meth:`node` - to obtain the node. This is because it would be impossible to - distinguish between a string component name and a string data value. - -.. note:: - - You do not usually need to call this method directly; when possible, - provide the wirings when calling :meth:`add_component`. + component's name; if you only have a name, pass it to :meth:`Pipeline.node` + to obtain the node. This is because it would be impossible to distinguish + between a string component name and a string data value. .. _pipeline-execution: Execution --------- -Once configured, a pipeline can be run with :meth:`Pipeline.run`. This -method takes two types of inputs: +Once configured, a pipeline can be run with :meth:`Pipeline.run`, or with one of +the operation functions (see :ref:`recommender-ops`; these functions call +:meth:`~Pipeline.run` under the hood). + +The :meth:`~Pipeline.run` method takes two types of inputs: * Positional arguments specifying the node(s) to run and whose results should be returned. This is to allow partial runs of pipelines (e.g. to only score @@ -186,7 +173,7 @@ method takes two types of inputs: component that was added to the pipeline. * Keyword arguments specifying the values for the pipeline's inputs, as defined by - calls to :meth:`create_input`. + calls to :meth:`Pipeline.create_input`. Pipeline execution logically proceeds in the following steps: @@ -248,7 +235,7 @@ Pipelines are defined by the following: * The components and inputs (nodes) * The component input connections (edges) -* The component configurations (see :class:`Configurable` and :class:`Component`) +* The component configurations (see :class:`Component`) * The components' learned parameters (see :class:`Trainable`) LensKit supports serializing both pipeline descriptions (components, @@ -259,13 +246,12 @@ two ways to save a pipeline or part thereof: pipeline; it has the usual downsides of pickling (arbitrary code execution, etc.). LensKit uses pickling to share pipelines with worker processes for parallel batch operations. -2. Save the pipeline configuration with :meth:`Pipeline.save_config`. This saves +2. Save the pipeline configuration with :meth:`Pipeline.get_config`. This saves the components, their configurations, and their connections, but **not** any learned parameter data. A new pipeline can be constructed from such a configuration can be reloaded with :meth:`Pipeline.from_config`. .. - 3. Save the pipeline parameters with :meth:`Pipeline.save_params`. This saves the learned parameters but **not** the configuration or connections. The parameters can be reloaded into a compatible pipeline with @@ -353,9 +339,105 @@ Component Interface Pipeline components are callable objects that can optionally provide configuration, training, and serialization capabilities. In the simplest case, a component that requires no training or configuration can simply be a Python -function; most components will extend the :class:`Component` base class to -expose configuration capabilities, and implement the :class:`Trainable` protocol -if they contain a model that needs to be trained. +function. + +Most components will extend the :class:`Component` base class to expose +configuration capabilities, and implement the :class:`Trainable` protocol if +they contain a model that needs to be trained. Components also must be pickleable, as LensKit uses pickling for shared memory parallelism in its batch-inference code. + +.. _component-config: + +Configuring Components +---------------------- + +Unlike components in some other machine learning packages, LensKit components +carry their configuration in a separate *configuration object* that can be +serialized to and from JSON-like data structures. + +To support configuration, all a component needs to do is (1) extend +:class:`Component`, and (2) declare an instance variable whose type is the +configuration object type. This configuration object's class can be either a +Python dataclass (see :mod:`dataclasses`) or a Pydantic model class (see +:class:`pydantic.BaseModel`); in both cases, they are serialized and validated +with Pydantic. :class:`Component.__init__` will take care of storing the +configuration object if one is provided, or instantiating the configuration +class with defaults or from keyword arguments. In most cases, you don't need +to define a constructor for a component. + +.. admonition:: Motivation + :class: note + + Splitting configuration off into a separate configuration model class, + instead of making them attributes and constructor parameters for the + component class itself, is for a few reasons: + + - Pydantic validation ensures that hyperparameters are of correct types + (and ranges, if you use more sophisticated Pydantic validations), + without needing to write as much manual input validation code in each + component. + - Declaring parameters as attributes, as keyword parameters to the + constructor, and saving them in the attributes is a lot of duplication + that increases opportunity for errors. + - It's slightly easier to document configuration parameters, and keep them + separate from other potential inputs, when they are in a configuration + class. + - Using Pydantic models provides consistent serialization of component + configurations to and from configuration files. + - The base class can provide well-defined and complete string + representations for free to all component implementations. + +.. _component-impl: + +Implementing Components +----------------------- + +Implementing a component therefore consists of a few steps: + +1. Defining the configuration class. +2. Defining the component class, with its `config` attribute declaration. +3. Defining a `__call__` method for the component class that performs the + component's actual computation. +4. If the component supports training, implementing the :class:`Trainable` + protocol by defining a :meth:`Trainable.train` method. + +A simple example component that computes a linear weighted blend of the scores +from two other components could look like this: + +.. literalinclude:: examples/blendcomp.py + +This component can be instantiated with its defaults: + +.. testsetup:: + + from blendcomp import LinearBlendScorer, LinearBlendConfig + + +.. doctest:: + + >>> LinearBlendScorer() + + +You an instantiate it with its configuration class: + +.. doctest:: + + >>> LinearBlendScorer(LinearBlendConfig(mix_weight=0.2)) + + +Finally, you can directly pass configuration parameters to the component constructor: + +.. doctest:: + + >>> LinearBlendScorer(mix_weight=0.7) + + +See :ref:`conventions` for more conventions for component design. diff --git a/docs/guide/queries.rst b/docs/guide/queries.rst index 5237c459a..038c157c3 100644 --- a/docs/guide/queries.rst +++ b/docs/guide/queries.rst @@ -39,6 +39,8 @@ You can also pass a raw user identifier or item list to the pipeline, as the key recommendation operations and most components accept a :class:`QueryInput` and pass it to :meth:`~RecQuery.create` to upgrade to a query. +.. _recommender-ops: + Invoking Recommenders ~~~~~~~~~~~~~~~~~~~~~ diff --git a/lenskit-funksvd/lenskit/funksvd.py b/lenskit-funksvd/lenskit/funksvd.py index 742f85ebf..171d48bc0 100644 --- a/lenskit-funksvd/lenskit/funksvd.py +++ b/lenskit-funksvd/lenskit/funksvd.py @@ -14,18 +14,51 @@ import numba as n import numpy as np from numba.experimental import jitclass +from pydantic import BaseModel from typing_extensions import override from lenskit import util -from lenskit.basic import BiasModel -from lenskit.data import Dataset, ItemList, QueryInput, RecQuery, UITuple, Vocabulary +from lenskit.basic import BiasModel, Damping +from lenskit.data import Dataset, ItemList, QueryInput, RecQuery, Vocabulary from lenskit.pipeline import Component, Trainable -from lenskit.types import RNGInput -from lenskit.util.random import random_generator +from lenskit.random import ConfiguredSeed, random_generator _logger = logging.getLogger(__name__) +class FunkSVDConfig(BaseModel): + "Configuration for :class:`FunkSVDScorer`." + + features: int = 50 + """ + Number of latent features. + """ + epochs: int = 100 + """ + Number of training epochs (per feature). + """ + learning_rate: float = 0.001 + """ + Gradient descent learning rate. + """ + regularization: float = 0.015 + """ + Parameter regularization. + """ + damping: Damping = 5.0 + """ + Bias damping term. + """ + range: tuple[float, float] | None = None + """ + Min/max range of ratings to clamp output. + """ + rng: ConfiguredSeed = None + """ + RNG seed. + """ + + @jitclass( [ ("user_features", n.float64[:, :]), @@ -198,7 +231,7 @@ def _align_add_bias(bias, index, keys, series): return bias, series -class FunkSVDScorer(Component, Trainable): +class FunkSVDScorer(Trainable, Component): """ FunkSVD explicit-feedback matrix factoriation. FunkSVD is a regularized biased matrix factorization technique trained with featurewise stochastic @@ -214,35 +247,9 @@ class FunkSVDScorer(Component, Trainable): Stability: Caller - - Args: - features: - the number of features to train - iterations: - the number of iterations to train each feature - lrate: - the learning rate - reg: - the regularization factor - damping: - damping factor for the underlying mean - bias: - the underlying bias model to fit. If ``True``, then a - :py:class:`lenskit.basic.BiasModel` model is fit with ``damping``. - range: - the ``(min, max)`` rating values to clamp ratings, or ``None`` to - leave predictions unclamped. - rng: - The random seed for shuffling the input data (see :ref:`rng`). """ - features: int - iterations: int - lrate: float - reg: float - damping: UITuple[float] - range: tuple[float, float] | None - rng: RNGInput + config: FunkSVDConfig bias_: BiasModel users_: Vocabulary @@ -250,25 +257,6 @@ class FunkSVDScorer(Component, Trainable): items_: Vocabulary item_features_: np.ndarray[tuple[int, int], np.dtype[np.float64]] - def __init__( - self, - features: int = 50, - iterations: int = 100, - *, - lrate: float = 0.001, - reg: float = 0.015, - damping: UITuple[float] | float | tuple[float, float] = 5.0, - range: tuple[float, float] | None = None, - rng: RNGInput = None, - ): - self.features = features - self.iterations = iterations - self.lrate = lrate - self.reg = reg - self.damping = UITuple.create(damping) - self.range = range - self.rng = rng - @property def is_trained(self) -> bool: return hasattr(self, "item_features_") @@ -285,12 +273,12 @@ def train(self, data: Dataset): rate_df = data.interaction_matrix(format="pandas", layout="coo", field="rating") _logger.info("[%s] fitting bias model", timer) - self.bias_ = BiasModel.learn(data, damping=self.damping) + self.bias_ = BiasModel.learn(data, damping=self.config.damping) _logger.info("[%s] preparing rating data for %d samples", timer, len(rate_df)) _logger.debug("shuffling rating data") shuf = np.arange(len(rate_df), dtype=np.int_) - rng = random_generator(self.rng) + rng = random_generator(self.config.rng) rng.shuffle(shuf) rate_df = rate_df.iloc[shuf, :] @@ -310,11 +298,16 @@ def train(self, data: Dataset): _logger.debug("[%s] initializing data structures", timer) context = Context(users, items, ratings, initial) - params = make_params(self.iterations, self.lrate, self.reg, self.range) + params = make_params( + self.config.epochs, + self.config.learning_rate, + self.config.regularization, + self.config.range, + ) - model = _fresh_model(self.features, data.users.size, data.items.size) + model = _fresh_model(self.config.features, data.users.size, data.items.size) - _logger.info("[%s] training biased MF model with %d features", timer, self.features) + _logger.info("[%s] training biased MF model with %d features", timer, self.config.features) train(context, params, model, timer) _logger.info("finished model training in %s", timer) @@ -347,6 +340,3 @@ def __call__(self, query: QueryInput, items: ItemList) -> ItemList: scores += biases return ItemList(items, scores=scores) - - def __str__(self): - return "FunkSVD(features={}, reg={})".format(self.features, self.reg) diff --git a/lenskit-funksvd/tests/test_funksvd.py b/lenskit-funksvd/tests/test_funksvd.py index 49bdb5f40..9b2babba4 100644 --- a/lenskit-funksvd/tests/test_funksvd.py +++ b/lenskit-funksvd/tests/test_funksvd.py @@ -16,7 +16,7 @@ from lenskit import batch from lenskit.data import Dataset, ItemList, ItemListCollection, UserIDKey, from_interactions_df from lenskit.funksvd import FunkSVDScorer -from lenskit.metrics import call_metric, quick_measure_model +from lenskit.metrics import quick_measure_model from lenskit.pipeline.common import predict_pipeline from lenskit.testing import BasicComponentTests, ScorerTests, wantjit @@ -33,7 +33,9 @@ class TestFunkSVD(BasicComponentTests, ScorerTests): def test_fsvd_basic_build(): - algo = FunkSVDScorer(20, iterations=20) + algo = FunkSVDScorer(features=20, epochs=20) + assert algo.config is not None + assert algo.config.features == 20 algo.train(simple_ds) assert algo.bias_ is not None @@ -43,7 +45,7 @@ def test_fsvd_basic_build(): def test_fsvd_clamp_build(): - algo = FunkSVDScorer(20, iterations=20, range=(1, 5)) + algo = FunkSVDScorer(features=20, epochs=20, range=(1, 5)) algo.train(simple_ds) assert algo.bias_ is not None @@ -53,7 +55,7 @@ def test_fsvd_clamp_build(): def test_fsvd_predict_basic(): - algo = FunkSVDScorer(20, iterations=20) + algo = FunkSVDScorer(features=20, epochs=20) algo.train(simple_ds) assert algo.bias_ is not None @@ -71,7 +73,7 @@ def test_fsvd_predict_basic(): def test_fsvd_predict_clamp(): - algo = FunkSVDScorer(20, iterations=20, range=(1, 5)) + algo = FunkSVDScorer(features=20, epochs=20, range=(1, 5)) algo.train(simple_ds) assert algo.bias_ is not None @@ -89,7 +91,7 @@ def test_fsvd_predict_clamp(): def test_fsvd_predict_bad_item(): - algo = FunkSVDScorer(20, iterations=20) + algo = FunkSVDScorer(features=20, epochs=20) algo.train(simple_ds) assert algo.bias_ is not None @@ -106,7 +108,7 @@ def test_fsvd_predict_bad_item(): def test_fsvd_predict_bad_item_clamp(): - algo = FunkSVDScorer(20, iterations=20, range=(1, 5)) + algo = FunkSVDScorer(features=20, epochs=20, range=(1, 5)) algo.train(simple_ds) assert algo.bias_ is not None @@ -123,7 +125,7 @@ def test_fsvd_predict_bad_item_clamp(): def test_fsvd_predict_bad_user(): - algo = FunkSVDScorer(20, iterations=20) + algo = FunkSVDScorer(features=20, epochs=20) algo.train(simple_ds) assert algo.bias_ is not None @@ -142,7 +144,7 @@ def test_fsvd_predict_bad_user(): @wantjit @mark.slow def test_fsvd_save_load(ml_ds: Dataset): - original = FunkSVDScorer(20, iterations=20) + original = FunkSVDScorer(features=20, epochs=20) original.train(ml_ds) assert original.bias_ is not None @@ -168,7 +170,7 @@ def test_fsvd_save_load(ml_ds: Dataset): @wantjit @mark.slow def test_fsvd_known_preds(ml_ds: Dataset): - algo = FunkSVDScorer(15, iterations=125, lrate=0.001) + algo = FunkSVDScorer(features=15, epochs=125, lrate=0.001) _log.info("training %s on ml data", algo) pipe = predict_pipeline(algo, fallback=False) pipe.train(ml_ds) @@ -202,7 +204,9 @@ def test_fsvd_known_preds(ml_ds: Dataset): @mark.eval def test_fsvd_batch_accuracy(ml_100k: pd.DataFrame): ds = from_interactions_df(ml_100k) - results = quick_measure_model(FunkSVDScorer(25, 125, damping=10), ds, predicts_ratings=True) + results = quick_measure_model( + FunkSVDScorer(features=25, epochs=125, damping=10), ds, predicts_ratings=True + ) assert results.global_metrics().loc["MAE"] == approx(0.74, abs=0.025) assert results.list_summary().loc["RMSE", "mean"] == approx(0.92, abs=0.05) diff --git a/lenskit-implicit/lenskit/implicit.py b/lenskit-implicit/lenskit/implicit.py index 57395c185..0ea573df7 100644 --- a/lenskit-implicit/lenskit/implicit.py +++ b/lenskit-implicit/lenskit/implicit.py @@ -4,13 +4,13 @@ # Licensed under the MIT license, see LICENSE.md for details. # SPDX-License-Identifier: MIT -import inspect import logging import numpy as np from implicit.als import AlternatingLeastSquares from implicit.bpr import BayesianPersonalizedRanking from implicit.recommender_base import RecommenderBase +from pydantic import BaseModel, JsonValue from scipy.sparse import csr_matrix from typing_extensions import override @@ -26,26 +26,28 @@ ] +class ImplicitConfig(BaseModel, extra="allow"): + __pydantic_extra__: dict[str, JsonValue] + + +class ImplicitALSConfig(ImplicitConfig, extra="allow"): + weight: float = 40.0 + + class BaseRec(Component, Trainable): """ Base class for Implicit-backed recommenders. Stability: Caller - - Args: - delegate: - The delegate algorithm. """ + config: ImplicitConfig delegate: RecommenderBase """ The delegate algorithm from :mod:`implicit`. """ - weight: float - """ - The weight for positive examples (only used by some algorithms). - """ + weight: float = 1.0 matrix_: csr_matrix """ @@ -60,10 +62,6 @@ class BaseRec(Component, Trainable): The item ID mapping from training. """ - def __init__(self, delegate: RecommenderBase): - self.delegate = delegate - self.weight = 1.0 - @property def is_trained(self): return hasattr(self, "matrix_") @@ -72,9 +70,8 @@ def is_trained(self): def train(self, data: Dataset): matrix = data.interaction_matrix("scipy", layout="csr", legacy=True) uir = matrix * self.weight - if getattr(self.delegate, "item_factors", None) is not None: # pragma: no cover - _logger.warning("implicit algorithm already trained, re-fit is usually a bug") + self.delegate = self._construct() _logger.info("training %s on %s matrix (%d nnz)", self.delegate, uir.shape, uir.nnz) self.delegate.fit(uir) @@ -85,6 +82,9 @@ def train(self, data: Dataset): return self + def _construct(self) -> RecommenderBase: + raise NotImplementedError("implicit constructor not implemented") + @override def __call__(self, query: QueryInput, items: ItemList) -> ItemList: query = RecQuery.create(query) @@ -113,24 +113,6 @@ def __call__(self, query: QueryInput, items: ItemList) -> ItemList: return ItemList(items, scores=scores) - def __getattr__(self, name): - if "delegate" not in self.__dict__: - raise AttributeError() - dd = self.delegate.__dict__ - if name in dd: - return dd[name] - else: - raise AttributeError() - - def get_params(self, deep=True): - dd = self.delegate.__dict__ - sig = inspect.signature(self.delegate.__class__) - names = list(sig.parameters.keys()) - return dict([(k, dd.get(k)) for k in names]) - - def __str__(self): - return "Implicit({})".format(self.delegate) - class ALS(BaseRec): """ @@ -140,15 +122,14 @@ class ALS(BaseRec): Caller """ - def __init__(self, *args, weight=40.0, **kwargs): - """ - Construct an ALS recommender. The arguments are passed as-is to - :py:class:`implicit.als.AlternatingLeastSquares`. The `weight` - parameter controls the confidence weight for positive examples. - """ + config: ImplicitALSConfig + + @property + def weight(self): + return self.config.weight - super().__init__(AlternatingLeastSquares(*args, **kwargs)) - self.weight = weight + def _construct(self): + return AlternatingLeastSquares(**self.config.__pydantic_extra__) # type: ignore class BPR(BaseRec): @@ -159,9 +140,5 @@ class BPR(BaseRec): Caller """ - def __init__(self, *args, **kwargs): - """ - Construct a BPR recommender. The arguments are passed as-is to - :py:class:`implicit.als.BayesianPersonalizedRanking`. - """ - super().__init__(BayesianPersonalizedRanking(*args, **kwargs)) + def _construct(self): + return BayesianPersonalizedRanking(**self.config.__pydantic_extra__) # type: ignore diff --git a/lenskit-implicit/tests/test_implicit.py b/lenskit-implicit/tests/test_implicit.py index b994e2287..e5f1fbf42 100644 --- a/lenskit-implicit/tests/test_implicit.py +++ b/lenskit-implicit/tests/test_implicit.py @@ -30,8 +30,8 @@ class TestImplicitBPR(BasicComponentTests, ScorerTests): @mark.slow def test_implicit_als_train_rec(ml_ds): - algo = ALS(25) - assert algo.factors == 25 + algo = ALS(factors=25) + assert algo.config.factors == 25 ret = algo.train(ml_ds) assert ret is algo @@ -55,7 +55,7 @@ def test_implicit_als_train_rec(ml_ds): @mark.parametrize("n_jobs", [1, None]) def test_implicit_als_batch_accuracy(ml_100k, n_jobs): ds = from_interactions_df(ml_100k) - results = quick_measure_model(ALS(25), ds, n_jobs=n_jobs) + results = quick_measure_model(ALS(factors=25), ds, n_jobs=n_jobs) ndcg = results.list_summary().loc["NDCG", "mean"] _log.info("nDCG for %d users is %.4f", len(results.list_metrics()), ndcg) @@ -64,8 +64,8 @@ def test_implicit_als_batch_accuracy(ml_100k, n_jobs): @mark.slow def test_implicit_bpr_train_rec(ml_ds): - algo = BPR(25, use_gpu=False) - assert algo.factors == 25 + algo = BPR(factors=25, use_gpu=False) + assert algo.config.factors == 25 algo.train(ml_ds) @@ -89,7 +89,7 @@ def test_implicit_bpr_train_rec(ml_ds): @mark.parametrize("n_jobs", [1, None]) def test_implicit_bpr_batch_accuracy(ml_100k, n_jobs): ds = from_interactions_df(ml_100k) - results = quick_measure_model(BPR(25), ds, n_jobs=n_jobs) + results = quick_measure_model(BPR(factors=25), ds, n_jobs=n_jobs) ndcg = results.list_summary().loc["NDCG", "mean"] _log.info("nDCG for %d users is %.4f", len(results.list_metrics()), ndcg) @@ -98,7 +98,7 @@ def test_implicit_bpr_batch_accuracy(ml_100k, n_jobs): def test_implicit_pickle_untrained(tmp_path): mf = tmp_path / "bpr.dat" - algo = BPR(25, use_gpu=False) + algo = BPR(factors=25, use_gpu=False) with mf.open("wb") as f: pickle.dump(algo, f) @@ -107,4 +107,4 @@ def test_implicit_pickle_untrained(tmp_path): a2 = pickle.load(f) assert a2 is not algo - assert a2.factors == 25 + assert a2.config.factors == 25 diff --git a/lenskit-sklearn/lenskit/sklearn/svd.py b/lenskit-sklearn/lenskit/sklearn/svd.py index 503c5ac2d..6811b33b4 100644 --- a/lenskit-sklearn/lenskit/sklearn/svd.py +++ b/lenskit-sklearn/lenskit/sklearn/svd.py @@ -7,13 +7,14 @@ from __future__ import annotations import logging +from dataclasses import dataclass import numpy as np from numpy.typing import NDArray from typing_extensions import Literal, override -from lenskit.basic import BiasModel -from lenskit.data import Dataset, ItemList, QueryInput, RecQuery, UITuple +from lenskit.basic import BiasModel, Damping +from lenskit.data import Dataset, ItemList, QueryInput, RecQuery from lenskit.data.vocab import Vocabulary from lenskit.pipeline import Component, Trainable from lenskit.util import Stopwatch @@ -29,6 +30,14 @@ _log = logging.getLogger(__name__) +@dataclass +class BiasedSVDConfig: + features: int = 50 + damping: Damping = 5 + algorithm: Literal["arpack", "randomized"] = "randomized" + n_iter: int = 5 + + class BiasedSVDScorer(Component, Trainable): """ Biased matrix factorization for explicit feedback using SciKit-Learn's @@ -44,10 +53,7 @@ class BiasedSVDScorer(Component, Trainable): Caller """ - features: int - damping: UITuple[float] - algorithm: Literal["arpack", "randomized"] - n_iter: int + config: BiasedSVDConfig bias_: BiasModel factorization_: TruncatedSVD @@ -55,19 +61,6 @@ class BiasedSVDScorer(Component, Trainable): items_: Vocabulary user_components_: NDArray[np.float64] - def __init__( - self, - features: int = 50, - *, - damping: UITuple[float] | float | tuple[float, float] = 5, - algorithm: Literal["arpack", "randomized"] = "randomized", - n_iter: int = 5, - ): - self.features = features - self.damping = UITuple.create(damping) - self.algorithm = algorithm - self.n_iter = n_iter - @property def is_trained(self): return hasattr(self, "factorization_") @@ -76,7 +69,7 @@ def is_trained(self): def train(self, data: Dataset): timer = Stopwatch() _log.info("[%s] computing bias", timer) - self.bias_ = BiasModel.learn(data, self.damping) + self.bias_ = BiasModel.learn(data, self.config.damping) g_bias = self.bias_.global_bias u_bias = self.bias_.user_biases @@ -94,7 +87,7 @@ def train(self, data: Dataset): r_mat = r_mat.tocsr() self.factorization_ = TruncatedSVD( - self.features, algorithm=self.algorithm, n_iter=self.n_iter + self.config.features, algorithm=self.config.algorithm, n_iter=self.config.n_iter ) _log.info("[%s] training SVD (k=%d)", timer, self.factorization_.n_components) # type: ignore Xt = self.factorization_.fit_transform(r_mat) @@ -131,6 +124,3 @@ def __call__(self, query: QueryInput, items: ItemList) -> ItemList: scores += biases return ItemList(items, scores=scores) - - def __str__(self): - return f"BiasedSVD({self.features})" diff --git a/lenskit-sklearn/tests/test_svd.py b/lenskit-sklearn/tests/test_svd.py index 8dbe6a621..8fad50a82 100644 --- a/lenskit-sklearn/tests/test_svd.py +++ b/lenskit-sklearn/tests/test_svd.py @@ -33,7 +33,7 @@ class TestBiasedSVD(BasicComponentTests, ScorerTests): @need_skl def test_svd_basic_build(): - algo = svd.BiasedSVDScorer(2) + algo = svd.BiasedSVDScorer(features=2) algo.train(simple_ds) assert algo.user_components_.shape == (3, 2) @@ -42,7 +42,7 @@ def test_svd_basic_build(): @need_skl def test_svd_predict_basic(): _log.info("SVD input data:\n%s", simple_df) - algo = svd.BiasedSVDScorer(2, damping=0) + algo = svd.BiasedSVDScorer(features=2, damping=0) algo.train(simple_ds) _log.info("user means:\n%s", str(algo.bias_.user_biases)) _log.info("item means:\n%s", str(algo.bias_.item_biases)) @@ -59,7 +59,7 @@ def test_svd_predict_basic(): @need_skl def test_svd_predict_bad_item(): - algo = svd.BiasedSVDScorer(2) + algo = svd.BiasedSVDScorer(features=2) algo.train(simple_ds) preds = algo(10, ItemList([4])) @@ -72,7 +72,7 @@ def test_svd_predict_bad_item(): @need_skl def test_svd_predict_bad_user(): - algo = svd.BiasedSVDScorer(2) + algo = svd.BiasedSVDScorer(features=2) algo.train(simple_ds) preds = algo(50, ItemList([3])) @@ -86,7 +86,7 @@ def test_svd_predict_bad_user(): @need_skl @mark.slow def test_svd_save_load(ml_ds: Dataset): - original = svd.BiasedSVDScorer(20) + original = svd.BiasedSVDScorer(features=20) original.train(ml_ds) mod = pickle.dumps(original) @@ -104,7 +104,7 @@ def test_svd_save_load(ml_ds: Dataset): @mark.eval def test_svd_batch_accuracy(rng, ml_100k: pd.DataFrame): data = from_interactions_df(ml_100k) - svd_algo = svd.BiasedSVDScorer(25, damping=10) + svd_algo = svd.BiasedSVDScorer(features=25, damping=10) results = quick_measure_model(svd_algo, data, predicts_ratings=True, rng=rng) assert results.global_metrics()["MAE"] == approx(0.71, abs=0.025) diff --git a/lenskit/lenskit/als/_common.py b/lenskit/lenskit/als/_common.py index c0932b03e..6bd4278f5 100644 --- a/lenskit/lenskit/als/_common.py +++ b/lenskit/lenskit/als/_common.py @@ -6,21 +6,65 @@ from __future__ import annotations -import logging from abc import ABC, abstractmethod +from typing import Any, Literal, TypeAlias import numpy as np +import structlog import torch +from pydantic import BaseModel from typing_extensions import Iterator, NamedTuple, Self, override from lenskit import util from lenskit.data import Dataset, ItemList, QueryInput, RecQuery, Vocabulary -from lenskit.data.types import UITuple +from lenskit.data.types import UIPair from lenskit.logging import item_progress from lenskit.parallel.config import ensure_parallel_init from lenskit.pipeline import Component, Trainable -from lenskit.types import RNGInput -from lenskit.util.random import random_generator +from lenskit.random import ConfiguredSeed, RNGInput, RNGLike, random_generator + +EntityClass: TypeAlias = Literal["user", "item"] + + +class ALSConfig(BaseModel): + """ + Configuration for ALS scorers. + """ + + features: int = 50 + """ + The numer of latent features to learn. + """ + epochs: int = 10 + """ + The number of epochs to train. + """ + regularization: float | UIPair[float] = 0.1 + """ + L2 regularization strength. + """ + rng: ConfiguredSeed = None + """ + Random number seed. + """ + save_user_features: bool = True + """ + Whether to retain user feature values after training. + """ + + @property + def user_reg(self) -> float: + if isinstance(self.regularization, UIPair): + return self.regularization.user + else: + return self.regularization + + @property + def item_reg(self) -> float: + if isinstance(self.regularization, UIPair): + return self.regularization.item + else: + return self.regularization class TrainContext(NamedTuple): @@ -94,39 +138,23 @@ class ALSBase(ABC, Component, Trainable): Caller """ - features: int - epochs: int - reg: UITuple[float] - rng: RNGInput - save_user_features: bool + config: ALSConfig + rng: RNGLike | None = None users_: Vocabulary | None items_: Vocabulary user_features_: torch.Tensor | None item_features_: torch.Tensor - @property - @abstractmethod - def logger(self) -> logging.Logger: # pragma: no cover - """ - Overridden in implementation to provide the logger. - """ - ... + logger: structlog.stdlib.BoundLogger - def __init__( - self, - features: int, - *, - epochs: int = 10, - reg: UITuple[float] | float | tuple[float, float] = 0.1, - save_user_features: bool = True, - rng: RNGInput = None, - ): - self.features = features - self.epochs = epochs - self.reg = UITuple.create(reg) - self.rng = rng - self.save_user_features = save_user_features + def __init__(self, config: ALSConfig | None = None, *, rng: RNGInput = None, **kwargs: Any): + # hadle non-configurable RNG + if isinstance(rng, (np.random.Generator, np.random.BitGenerator)): + self.rng = rng + elif rng is not None: + kwargs = kwargs | {"rng": rng} + super().__init__(config, **kwargs) @property def is_trained(self) -> bool: @@ -143,7 +171,7 @@ def train(self, data: Dataset): ensure_parallel_init() timer = util.Stopwatch() - for algo in self.fit_iters(data, timer=timer): + for algo in self.fit_iters(data): pass # we just need to do the iterations if self.user_features_ is not None: @@ -152,23 +180,25 @@ def train(self, data: Dataset): timer, torch.norm(self.user_features_, "fro"), torch.norm(self.item_features_, "fro"), + features=self.config.features, ) else: self.logger.info( "trained model in %s (|Q|=%f)", timer, torch.norm(self.item_features_, "fro"), + features=self.config.features, ) - def fit_iters(self, data: Dataset, *, timer: util.Stopwatch | None = None) -> Iterator[Self]: + def fit_iters(self, data: Dataset) -> Iterator[Self]: """ Run ALS to train a model, yielding after each iteration. Args: ratings: the ratings data frame. """ - if timer is None: - timer = util.Stopwatch() + + log = self.logger = self.logger.bind(features=self.config.features) train = self.prepare_data(data) self.users_ = train.users @@ -176,53 +206,36 @@ def fit_iters(self, data: Dataset, *, timer: util.Stopwatch | None = None) -> It self.initialize_params(train) - if isinstance(self.reg, tuple): - ureg, ireg = self.reg - else: - ureg = ireg = self.reg - assert self.user_features_ is not None assert self.item_features_ is not None u_ctx = TrainContext.create( - "user", train.ui_rates, self.user_features_, self.item_features_, ureg + "user", train.ui_rates, self.user_features_, self.item_features_, self.config.user_reg ) i_ctx = TrainContext.create( - "item", train.iu_rates, self.item_features_, self.user_features_, ireg + "item", train.iu_rates, self.item_features_, self.user_features_, self.config.item_reg ) - self.logger.info( - "[%s] training biased MF model with ALS for %d features", timer, self.features - ) - start = timer.elapsed() + log.info("beginning ALS model training") - with item_progress("Training ALS", self.epochs) as epb: - for epoch in range(self.epochs): + with item_progress("Training ALS", self.config.epochs) as epb: + for epoch in range(self.config.epochs): + log = log.bind(epoch=epoch) epoch = epoch + 1 du = self.als_half_epoch(epoch, u_ctx) - self.logger.debug("[%s] finished user epoch %d", timer, epoch) + log.debug("finished user epoch") di = self.als_half_epoch(epoch, i_ctx) - self.logger.debug("[%s] finished item epoch %d", timer, epoch) + log.debug("finished item epoch") - self.logger.info( - "[%s] finished epoch %d (|ΔP|=%.3f, |ΔQ|=%.3f)", timer, epoch, du, di - ) + log.info("finished epoch (|ΔP|=%.3f, |ΔQ|=%.3f)", du, di) epb.update() yield self - if not self.save_user_features: + if not self.config.save_user_features: self.user_features_ = None self.user_ = None - end = timer.elapsed() - self.logger.info( - "[%s] trained %d epochs (%.1fs/epoch)", - timer, - self.epochs, - (end - start) / self.epochs, - ) - @abstractmethod def prepare_data(self, data: Dataset) -> TrainingData: # pragma: no cover """ @@ -241,13 +254,13 @@ def initialize_params(self, data: TrainingData): """ Initialize the model parameters at the beginning of training. """ - rng = random_generator(self.rng) + rng = random_generator(self.rng or self.config.rng) self.logger.debug("initializing item matrix") - self.item_features_ = self.initial_params(data.n_items, self.features, rng) + self.item_features_ = self.initial_params(data.n_items, self.config.features, rng) self.logger.debug("|Q|: %f", torch.norm(self.item_features_, "fro")) self.logger.debug("initializing user matrix") - self.user_features_ = self.initial_params(data.n_users, self.features, rng) + self.user_features_ = self.initial_params(data.n_users, self.config.features, rng) self.logger.debug("|P|: %f", torch.norm(self.user_features_, "fro")) @abstractmethod @@ -311,3 +324,6 @@ def finalize_scores( Perform any final transformation of scores prior to returning them. """ return items + + def __getstate__(self): + return {k: v for (k, v) in self.__dict__.items() if k != "logger"} diff --git a/lenskit/lenskit/als/_explicit.py b/lenskit/lenskit/als/_explicit.py index 1f6d9923e..b6b1de6e9 100644 --- a/lenskit/lenskit/als/_explicit.py +++ b/lenskit/lenskit/als/_explicit.py @@ -6,23 +6,25 @@ from __future__ import annotations -import logging - import numpy as np import torch from typing_extensions import override -from lenskit.basic import BiasModel +from lenskit.basic import BiasModel, Damping from lenskit.data import Dataset, ItemList -from lenskit.data.types import UITuple +from lenskit.logging import get_logger from lenskit.logging.progress import item_progress_handle, pbh_update from lenskit.math.solve import solve_cholesky from lenskit.parallel.chunking import WorkChunks -from lenskit.types import RNGInput -from ._common import ALSBase, TrainContext, TrainingData +from ._common import ALSBase, ALSConfig, TrainContext, TrainingData + -_log = logging.getLogger(__name__) +class BiasedMFConfig(ALSConfig): + damping: Damping = 5.0 + """ + Damping for the bias model. + """ class BiasedMFScorer(ALSBase): @@ -38,57 +40,20 @@ class BiasedMFScorer(ALSBase): Stability: Caller - - Args: - features: - The number of features to train. - epochs: - The number of iterations to train. - reg: - The regularization factor; can also be a tuple ``(ureg, ireg)`` to - specify separate user and item regularization terms. - damping: - Damping term for the bias model. - rng: - Random number seed or generator. """ - timer = None - - damping: UITuple[float] - + config: BiasedMFConfig bias_: BiasModel - def __init__( - self, - features: int = 50, - *, - epochs: int = 10, - reg: float | tuple[float, float] = 0.1, - damping: float | UITuple[float] | tuple[float, float] = 5.0, - rng: RNGInput = None, - save_user_features: bool = True, - ): - super().__init__( - features, - epochs=epochs, - reg=reg, - rng=rng, - save_user_features=save_user_features, - ) - self.damping = UITuple.create(damping) - - @property - def logger(self): - return _log + logger = get_logger(__name__, variant="biased") @override def prepare_data(self, data: Dataset): # transform ratings using offsets rmat = data.interaction_matrix("torch", layout="coo", field="rating") - _log.info("[%s] normalizing ratings", self.timer) - self.bias_ = BiasModel.learn(data, self.damping) + self.logger.info("normalizing ratings") + self.bias_ = BiasModel.learn(data, damping=self.config.damping) rmat = self.bias_.transform_matrix(rmat) rmat = rmat.to_sparse_csr() @@ -123,7 +88,9 @@ def new_user_embedding( ri_val = ratings[mask].to(torch.float64) - u_feat = _train_bias_row_cholesky(inums[mask], ri_val, self.item_features_, self.reg.user) + u_feat = _train_bias_row_cholesky( + inums[mask], ri_val, self.item_features_, self.config.user_reg + ) return u_feat, u_bias @override @@ -145,9 +112,6 @@ def finalize_scores( return ItemList(items, scores=scores) - def __str__(self): - return "als.BiasedMFScorer(features={}, regularization={})".format(self.features, self.reg) - @torch.jit.script def _train_solve_row( diff --git a/lenskit/lenskit/als/_implicit.py b/lenskit/lenskit/als/_implicit.py index bf0500a55..3fd615fba 100644 --- a/lenskit/lenskit/als/_implicit.py +++ b/lenskit/lenskit/als/_implicit.py @@ -6,7 +6,6 @@ from __future__ import annotations -import logging import math import numpy as np @@ -14,14 +13,23 @@ from typing_extensions import override from lenskit.data import Dataset, ItemList +from lenskit.logging import get_logger from lenskit.logging.progress import item_progress_handle, pbh_update from lenskit.math.solve import solve_cholesky from lenskit.parallel.chunking import WorkChunks -from lenskit.types import RNGInput -from ._common import ALSBase, TrainContext, TrainingData +from ._common import ALSBase, ALSConfig, TrainContext, TrainingData -_log = logging.getLogger(__name__) + +class ImplicitMFConfig(ALSConfig): + weight: float = 40 + """ + The confidence weight for positive examples. + """ + use_ratings: bool = False + """ + If ``True``, use rating values instead of just presence or absence. + """ class ImplicitMFScorer(ALSBase): @@ -54,70 +62,25 @@ class ImplicitMFScorer(ALSBase): Stability: Caller - - Args: - features: - The number of features to train - epochs: - The number of iterations to train - reg: - The regularization factor - weight: - The scaling weight for positive samples (:math:`\\alpha` in - :cite:p:`hu:implicit-mf`). - use_ratings: - Whether to use the `rating` column, if present. Defaults to - ``False``; when ``True``, the values from the ``rating`` column are - used, and multipled by ``weight``; if ``False``, ImplicitMF treats - every rated user-item pair as having a rating of 1.\ - save_user_feature: - Whether to save the user feature vector in the model, or recompute - it at scoring time. - rng: - Random number seed or generator. """ - weight: float - use_ratings: bool + logger = get_logger(__name__, variant="implicit") - OtOr_: torch.Tensor + config: ImplicitMFConfig - def __init__( - self, - features: int = 50, - *, - epochs: int = 20, - reg: float | tuple[float, float] = 0.1, - weight: float = 40, - use_ratings: bool = False, - save_user_features: bool = True, - rng: RNGInput = None, - ): - super().__init__( - features, - epochs=epochs, - reg=reg, - save_user_features=save_user_features, - rng=rng, - ) - self.weight = weight - self.use_ratings = use_ratings - - @property - def logger(self): - return _log + OtOr_: torch.Tensor @override def train(self, data: Dataset): super().train(data) # compute OtOr and save it on the model - reg = self.reg[0] if isinstance(self.reg, tuple) else self.reg + reg = self.config.user_reg self.OtOr_ = _implicit_otor(self.item_features_, reg) @override def prepare_data(self, data: Dataset) -> TrainingData: - if self.use_ratings: + if self.config.use_ratings: rmat = data.interaction_matrix("torch", field="rating") else: rmat = data.interaction_matrix("torch") @@ -125,7 +88,7 @@ def prepare_data(self, data: Dataset) -> TrainingData: rmat = torch.sparse_csr_tensor( crow_indices=rmat.crow_indices(), col_indices=rmat.col_indices(), - values=rmat.values() * self.weight, + values=rmat.values() * self.config.weight, size=rmat.shape, ) return TrainingData.create(data.users, data.items, rmat) @@ -153,23 +116,49 @@ def new_user_embedding( ri_good = ri_idxes >= 0 ri_it = ri_idxes[ri_good] - if self.use_ratings: + if self.config.use_ratings: ratings = user_items.field("rating", "torch") if ratings is None: raise ValueError("no ratings in user items") - ri_val = ratings[ri_good] * self.weight + ri_val = ratings[ri_good] * self.config.weight else: - ri_val = torch.full((len(ri_good),), self.weight) + ri_val = torch.full((len(ri_good),), self.config.weight) ri_val = ri_val.to(self.item_features_.dtype) - u_feat = _train_implicit_row_cholesky(ri_it, ri_val, self.item_features_, self.OtOr_) + u_feat = self._train_new_row(ri_it, ri_val, self.item_features_, self.OtOr_) return u_feat, None - def __str__(self): - return "als.ImplicitMFScorer(features={}, reg={}, w={})".format( - self.features, self.reg, self.weight - ) + def _train_new_row( + self, items: torch.Tensor, ratings: torch.Tensor, i_embeds: torch.Tensor, OtOr: torch.Tensor + ) -> torch.Tensor: + """ + Train a single user row with new rating data. + + Args: + items: the item IDs the user has rated + ratings: the user's ratings for those items (when rating values are used) + other: the item-feature matrix + OtOr: the pre-computed regularization and background matrix. + + Returns: + The user-feature vector. + """ + self.logger.debug("learning new user row", n_items=len(items)) + + # we can optimize by only considering the nonzero entries of Cu-I + # this means we only need the corresponding matrix columns + M = i_embeds[items, :] + # Compute M^T (C_u-I) M, restricted to these nonzero entries + MMT = (M.T * ratings) @ M + # Build the matrix for solving + A = OtOr + MMT + # Compute RHS - only used columns (p_ui != 0) values needed + y = i_embeds.T[:, items] @ (ratings + 1.0) + # and solve + x = solve_cholesky(A, y) + + return x @torch.jit.script @@ -183,38 +172,6 @@ def _implicit_otor(other: torch.Tensor, reg: float) -> torch.Tensor: return OtO -def _train_implicit_row_cholesky( - items: torch.Tensor, ratings: torch.Tensor, i_embeds: torch.Tensor, OtOr: torch.Tensor -) -> torch.Tensor: - """ - Train a single user row with new rating data. - - Args: - items: the item IDs the user has rated - ratings: the user's ratings for those items (when rating values are used) - other: the item-feature matrix - OtOr: the pre-computed regularization and background matrix. - - Returns: - The user-feature vector. - """ - _log.debug("learning new user row with %d items", len(items)) - - # we can optimize by only considering the nonzero entries of Cu-I - # this means we only need the corresponding matrix columns - M = i_embeds[items, :] - # Compute M^T (C_u-I) M, restricted to these nonzero entries - MMT = (M.T * ratings) @ M - # Build the matrix for solving - A = OtOr + MMT - # Compute RHS - only used columns (p_ui != 0) values needed - y = i_embeds.T[:, items] @ (ratings + 1.0) - # and solve - x = solve_cholesky(A, y) - - return x - - def _train_implicit_cholesky_rows( ctx: TrainContext, OtOr: torch.Tensor, start: int, end: int, pbh: str ) -> torch.Tensor: diff --git a/lenskit/lenskit/basic/__init__.py b/lenskit/lenskit/basic/__init__.py index 613c11273..2be73e560 100644 --- a/lenskit/lenskit/basic/__init__.py +++ b/lenskit/lenskit/basic/__init__.py @@ -2,7 +2,7 @@ Basic and baseline pipeline components. """ -from .bias import BiasModel, BiasScorer +from .bias import BiasModel, BiasScorer, Damping from .candidates import AllTrainingItemsCandidateSelector, UnratedTrainingItemsCandidateSelector from .composite import FallbackScorer from .history import UserTrainingHistoryLookup @@ -13,6 +13,7 @@ __all__ = [ "BiasModel", "BiasScorer", + "Damping", "PopScorer", "TopNRanker", "RandomSelector", diff --git a/lenskit/lenskit/basic/bias.py b/lenskit/lenskit/basic/bias.py index ae977193a..a52254be1 100644 --- a/lenskit/lenskit/basic/bias.py +++ b/lenskit/lenskit/basic/bias.py @@ -10,17 +10,21 @@ from __future__ import annotations import logging +from collections.abc import Container from dataclasses import dataclass +from typing import Literal import numpy as np import torch +from pydantic import BaseModel, NonNegativeFloat from typing_extensions import Self, TypeAlias, overload -from lenskit.data import ID, Dataset, ItemList, QueryInput, RecQuery, UITuple, Vocabulary +from lenskit.data import ID, Dataset, ItemList, QueryInput, RecQuery, Vocabulary from lenskit.pipeline.components import Component _logger = logging.getLogger(__name__) -Damping: TypeAlias = float | UITuple[float] | tuple[float, float] +BiasEntity: TypeAlias = Literal["user", "item"] +Damping: TypeAlias = float | dict[BiasEntity, float] @dataclass @@ -56,7 +60,7 @@ class uses this model to score items in a pipeline; the model is reusable Caller """ - damping: UITuple[float] + damping: Damping "The mean damping terms." global_bias: float @@ -74,7 +78,11 @@ class uses this model to score items in a pipeline; the model is reusable @classmethod def learn( - cls, data: Dataset, damping: Damping = 0.0, *, items: bool = True, users: bool = True + cls, + data: Dataset, + damping: Damping | tuple[float, float] = 0.0, + *, + entities: Container[BiasEntity] = frozenset({"user", "item"}), ) -> Self: """ Learn a bias model and its parameters from a dataset. @@ -91,8 +99,9 @@ def learn( users: Whether to compute user biases """ - damping = UITuple.create(damping) + if isinstance(damping, tuple): + damping = {"user": damping[0], "item": damping[1]} _logger.info("building bias model for %d ratings", data.interaction_count) ratings = data.interaction_matrix("scipy", layout="coo", field="rating") nrows, ncols = ratings.shape # type: ignore @@ -106,8 +115,8 @@ def learn( if np.allclose(centered, 0): _logger.warning("mean-centered ratings are all 0, bias probably meaningless") - if items: - counts = np.full(ncols, damping.item) + if "item" in entities: + counts = np.full(ncols, entity_damping(damping, "item")) sums = np.zeros(ncols) np.add.at(counts, ratings.col, 1) np.add.at(sums, ratings.col, centered) @@ -121,8 +130,8 @@ def learn( centered -= i_bias[ratings.col] _logger.info("computed biases for %d items", len(i_bias)) - if users: - counts = np.full(nrows, damping.user) + if "user" in entities: + counts = np.full(nrows, entity_damping(damping, "user")) sums = np.zeros(nrows) np.add.at(counts, ratings.row, 1) np.add.at(sums, ratings.row, centered) @@ -210,7 +219,9 @@ def compute_for_items( r_mask = r_idxes >= 0 uoff[r_mask] -= self.item_biases[r_idxes[r_mask]] - user_bias = np.sum(uoff) / (np.sum(np.isfinite(uoff)) + self.damping.user) + user_bias = np.sum(uoff) / ( + np.sum(np.isfinite(uoff)) + entity_damping(self.damping, "user") + ) scores += user_bias elif user_id is not None: @@ -240,55 +251,41 @@ def transform_matrix(self, matrix: torch.Tensor): return torch.sparse_coo_tensor(indices, values, size=matrix.size()) +class BiasConfig(BaseModel, extra="forbid"): + """ + Configuration for :class:`BiasScorer`. + """ + + entities: set[Literal["user", "item"]] = {"user", "item"} + """ + The entities to compute biases for, in addition to global bais. Defaults to + users and items. + """ + damping: NonNegativeFloat | dict[Literal["user", "item"], NonNegativeFloat] = 0.0 + + def entity_damping(self, entity: Literal["user", "item"]) -> float: + """ + Look up the damping for a particular entity type. + """ + return entity_damping(self.damping, entity) + + class BiasScorer(Component): """ A user-item bias rating prediction model. This component uses :class:`BiasModel` to predict ratings for users and items. Args: - items: - Whether to compute item biases. - users: - Whether to compute user biases. - damping: - Bayesian damping to apply to computed biases. Either a number, to - damp both user and item biases the same amount, or a (user,item) - tuple providing separate damping values. + config: + The component configuration. Stability: Caller """ - IGNORED_CONFIG_FIELDS = ["user_damping", "item_damping"] - - users: bool - items: bool - damping: UITuple[float] - "The configured offset damping levels." - + config: BiasConfig model_: BiasModel - def __init__( - self, - items: bool = True, - users: bool = True, - damping: float | UITuple[float] | tuple[float, float] = 0.0, - *, - user_damping: float | None = None, - item_damping: float | None = None, - ): - self.items = items - self.users = users - self.damping = UITuple.create(damping) - - if user_damping is not None or item_damping is not None: - self.damping = UITuple(user=user_damping or 0.0, item=item_damping or 0.0) - - if self.damping.user < 0: - raise ValueError("user damping must be non-negative") - if self.damping.item < 0: - raise ValueError("item damping must be non-negative") - @property def is_trained(self) -> bool: return hasattr(self, "model_") @@ -304,7 +301,7 @@ def train(self, data: Dataset): Returns: The trained bias object. """ - self.model_ = BiasModel.learn(data, self.damping, users=self.users, items=self.items) + self.model_ = BiasModel.learn(data, self.config.damping, entities=self.config.entities) def __call__(self, query: QueryInput, items: ItemList) -> ItemList: """ @@ -326,5 +323,12 @@ def __call__(self, query: QueryInput, items: ItemList) -> ItemList: scores, _bias = self.model_.compute_for_items(items, query.user_id, query.user_items) return ItemList(items, scores=scores) - def __str__(self): - return "Bias(ud={}, id={})".format(self.damping.user, self.damping.item) + +def entity_damping(damping: Damping, entity: BiasEntity) -> float: + """ + Look up the damping for a particular entity type. + """ + if isinstance(damping, dict): + return damping.get(entity, 0.0) + else: + return damping diff --git a/lenskit/lenskit/basic/candidates.py b/lenskit/lenskit/basic/candidates.py index 468dc4a56..a44a62c76 100644 --- a/lenskit/lenskit/basic/candidates.py +++ b/lenskit/lenskit/basic/candidates.py @@ -19,6 +19,7 @@ class TrainingCandidateSelectorBase(Component, Trainable): Caller """ + config: None items_: Vocabulary @property @@ -41,6 +42,8 @@ class AllTrainingItemsCandidateSelector(TrainingCandidateSelectorBase): Caller """ + config: None + def __call__(self) -> ItemList: return ItemList.from_vocabulary(self.items_) @@ -59,6 +62,8 @@ class UnratedTrainingItemsCandidateSelector(TrainingCandidateSelectorBase): Caller """ + config: None + def __call__(self, query: QueryInput) -> ItemList: query = RecQuery.create(query) items = ItemList.from_vocabulary(self.items_) diff --git a/lenskit/lenskit/basic/composite.py b/lenskit/lenskit/basic/composite.py index f2ad07c9e..b7b43c97f 100644 --- a/lenskit/lenskit/basic/composite.py +++ b/lenskit/lenskit/basic/composite.py @@ -18,6 +18,8 @@ class FallbackScorer(Component): Caller """ + config: None + def __call__(self, scores: ItemList, backup: ItemList) -> ItemList: s = scores.scores() if s is None: diff --git a/lenskit/lenskit/basic/popularity.py b/lenskit/lenskit/basic/popularity.py index ad797bc2f..444cbcd8a 100644 --- a/lenskit/lenskit/basic/popularity.py +++ b/lenskit/lenskit/basic/popularity.py @@ -2,9 +2,11 @@ import logging from datetime import datetime +from typing import Literal import numpy as np import pandas as pd +from pydantic import BaseModel from typing_extensions import override from lenskit.data import Dataset, ItemList, Vocabulary @@ -13,6 +15,18 @@ _log = logging.getLogger(__name__) +class PopConfig(BaseModel): + """ + Configuration for popularity scoring. + """ + + score: Literal["quantile", "rank", "count"] = "quantile" + """ + The method for computing popularity scores. For all methods, higher scores + represent more popular items. + """ + + class PopScorer(Component, Trainable): """ Score items by their popularity. Use with :py:class:`TopN` to get a @@ -21,27 +35,16 @@ class PopScorer(Component, Trainable): Stability: Caller - Args: - score_type: - The method for computing popularity scores. Can be one of the following: - - - ``'quantile'`` (the default) - - ``'rank'`` - - ``'count'`` - Attributes: item_pop_: Item popularity scores. """ - score_method: str + config: PopConfig items_: Vocabulary item_scores_: np.ndarray[int, np.dtype[np.float32]] - def __init__(self, score_method: str = "quantile"): - self.score_method = score_method - @property def is_trained(self) -> bool: return hasattr(self, "item_scores_") @@ -57,20 +60,20 @@ def train(self, data: Dataset): ) def _train_internal(self, scores: pd.Series) -> pd.Series: - if self.score_method == "rank": + if self.config.score == "rank": _log.info("ranking %d items", len(scores)) scores = scores.rank().sort_index() - elif self.score_method == "quantile": + elif self.config.score == "quantile": _log.info("computing quantiles for %d items", len(scores)) cmass = scores.sort_values() cmass = cmass.cumsum() cdens = cmass / scores.sum() scores = cdens.sort_index() - elif self.score_method == "count": + elif self.config.score == "count": _log.info("scoring items with their rating counts") scores = scores.sort_index() else: - raise ValueError("invalid scoring method " + repr(self.score_method)) + raise ValueError("invalid scoring method " + repr(self.config.score)) return scores @@ -81,8 +84,12 @@ def __call__(self, items: ItemList) -> ItemList: scores[mask] = self.item_scores_[inums[mask]] return ItemList(items, scores=scores) - def __str__(self): - return "PopScore({})".format(self.score_method) + +class TimeBoundedPopConfig(PopConfig): + cutoff: datetime + """ + Time window for computing popularity scores. + """ class TimeBoundedPopScore(PopScorer): @@ -91,26 +98,12 @@ class TimeBoundedPopScore(PopScorer): most recent `time_window` period. Use with :py:class:`TopN` to get a most-popular-recent-items recommender. - Args: - time_window(datetime.timedelta): - The time window for computing popularity scores. - score_type(str): - The method for computing popularity scores. Can be one of the following: - - - ``'quantile'`` (the default) - - ``'rank'`` - - ``'count'`` - Attributes: item_scores_(pandas.Series): Time-bounded item popularity scores. """ - def __init__(self, cutoff: datetime, score_method="quantile"): - super().__init__(score_method) - - self.cutoff = cutoff - self.score_method = score_method + config: TimeBoundedPopConfig @override def train(self, data: Dataset, **kwargs): @@ -125,7 +118,7 @@ def train(self, data: Dataset, **kwargs): return else: counts = np.zeros(data.item_count, dtype=np.int32) - start_timestamp = self.cutoff.timestamp() + start_timestamp = self.config.cutoff.timestamp() item_nums = log.item_nums[log.timestamps > start_timestamp] np.add.at(counts, item_nums, 1) @@ -136,7 +129,3 @@ def train(self, data: Dataset, **kwargs): self.items_ = data.items.copy() self.item_scores_ = np.require(item_scores.reindex(self.items_.ids()).values, np.float32) - - @override - def __str__(self): - return "TimeBoundedPopScore({}, {})".format(self.cutoff, self.score_method) diff --git a/lenskit/lenskit/basic/random.py b/lenskit/lenskit/basic/random.py index f704d0b17..02bd6686b 100644 --- a/lenskit/lenskit/basic/random.py +++ b/lenskit/lenskit/basic/random.py @@ -1,10 +1,23 @@ import numpy as np +from pydantic import BaseModel from lenskit.data import ItemList from lenskit.data.query import QueryInput, RecQuery from lenskit.pipeline import Component +from lenskit.random import DerivableSeed, RNGFactory, derivable_rng from lenskit.stats import argtopn -from lenskit.util.random import DerivableSeed, RNGFactory, derivable_rng + + +class RandomConfig(BaseModel, arbitrary_types_allowed=True): + n: int | None = None + """ + The number of items to select. -1 or ``None`` to return all scored items. + """ + + rng: DerivableSeed = None + """ + Random number generator configuration. + """ class RandomSelector(Component): @@ -23,14 +36,12 @@ class RandomSelector(Component): class supports derivable RNGs. """ - n: int - rng: DerivableSeed + config: RandomConfig _rng_factory: RNGFactory - def __init__(self, n: int = -1, rng: DerivableSeed = None): - self.n = n - self.rng = rng - self._rng_factory = derivable_rng(rng) + def __init__(self, config: RandomConfig | None = None, **kwargs): + super().__init__(config, **kwargs) + self._rng_factory = derivable_rng(self.config.rng) def __call__( self, items: ItemList, query: QueryInput | None = None, n: int | None = None @@ -46,7 +57,7 @@ def __call__( The number of items to select, overriding the configured value. """ if n is None: - n = self.n + n = self.config.n or -1 query = RecQuery.create(query) rng = self._rng_factory(query) @@ -89,14 +100,12 @@ class SoftmaxRanker(Component): class supports derivable RNGs. """ - n: int - rng: DerivableSeed + config: RandomConfig _rng_factory: RNGFactory - def __init__(self, n: int = -1, rng: DerivableSeed = None): - self.n = n - self.rng = rng - self._rng_factory = derivable_rng(rng) + def __init__(self, config: RandomConfig | None = None, **kwargs): + super().__init__(config, **kwargs) + self._rng_factory = derivable_rng(self.config.rng) def __call__( self, items: ItemList, query: QueryInput | None = None, n: int | None = None @@ -115,7 +124,8 @@ def __call__( return ItemList(item_ids=[], scores=[], ordered=True) if n is None or n < 0: - n = self.n + n = self.config.n or -1 + if n < 0 or n > N: n = N diff --git a/lenskit/lenskit/basic/topn.py b/lenskit/lenskit/basic/topn.py index b441aa889..8683b83db 100644 --- a/lenskit/lenskit/basic/topn.py +++ b/lenskit/lenskit/basic/topn.py @@ -4,6 +4,8 @@ import logging +from pydantic import BaseModel + from lenskit.data import ItemList from lenskit.pipeline.components import Component from lenskit.stats import argtopn @@ -11,6 +13,17 @@ _log = logging.getLogger(__name__) +class TopNConfig(BaseModel): + """ + Configuration for top-N ranking. + """ + + n: int | None = None + """ + The number of items to return. -1 or ``None`` to return all scored items. + """ + + class TopNRanker(Component): """ Rank scored items by their score and take the top *N*. The ranking length @@ -19,17 +32,10 @@ class TopNRanker(Component): Stability: Caller - - Args: - n: - The desired ranking length. If negative, then scored items are - ranked but the ranking is not truncated. """ - n: int - - def __init__(self, n: int = -1): - self.n = n + config: TopNConfig + "Configuration object." def __call__(self, *, items: ItemList, n: int | None = None) -> ItemList: """ @@ -48,7 +54,7 @@ def __call__(self, *, items: ItemList, n: int | None = None) -> ItemList: preserved. """ if n is None: - n = self.n + n = self.config.n or -1 if n >= 0: _log.debug("ranking top %d of %d items", n, len(items)) diff --git a/lenskit/lenskit/data/__init__.py b/lenskit/lenskit/data/__init__.py index df0c25675..48094157f 100644 --- a/lenskit/lenskit/data/__init__.py +++ b/lenskit/lenskit/data/__init__.py @@ -18,7 +18,7 @@ from .movielens import load_movielens, load_movielens_df from .mtarray import MTArray, MTFloatArray, MTGenericArray, MTIntArray from .query import QueryInput, RecQuery -from .types import ID, NPID, FeedbackType, UITuple +from .types import ID, NPID, FeedbackType from .vocab import Vocabulary __all__ = [ @@ -29,7 +29,6 @@ "MatrixDataset", "ID", "NPID", - "UITuple", "FeedbackType", "ItemList", "ItemListCollection", diff --git a/lenskit/lenskit/data/types.py b/lenskit/lenskit/data/types.py index 9cb28b466..2087b15a6 100644 --- a/lenskit/lenskit/data/types.py +++ b/lenskit/lenskit/data/types.py @@ -12,7 +12,7 @@ from __future__ import annotations from dataclasses import dataclass, field -from typing import Any, Generic, Literal, NamedTuple, Sequence, TypeAlias, TypedDict, TypeVar, cast +from typing import Any, Generic, Literal, Sequence, TypeAlias, TypeVar import numpy as np import pandas as pd @@ -54,37 +54,11 @@ class AliasedColumn: Column: TypeAlias = str | AliasedColumn -class UIDict(TypedDict, Generic[T]): - user: T - item: T - - -class UITuple(NamedTuple, Generic[T]): +@dataclass(frozen=True) +class UIPair(Generic[T]): """ - Tuple of (user, item) data, typically for configuration and similar - purposes. - - Stability: - Caller + A user-item pair of values. """ user: T - "User data." item: T - "Item data." - - @classmethod - def create(cls, x: UITuple[T] | tuple[T, T] | UIDict[T] | T) -> UITuple[T]: - """ - Create a user-item tuple from a tuple or data. If a single value - is provided, it is used for both user and item. - """ - if isinstance(x, UITuple): - return cast(UITuple[T], x) - elif isinstance(x, (tuple, list)): - u, i = cast(tuple[T, T], x) - return UITuple(u, i) - elif isinstance(x, dict): - return UITuple(x["user"], x["item"]) - else: - return UITuple(x, x) diff --git a/lenskit/lenskit/knn/item.py b/lenskit/lenskit/knn/item.py index 3d33a474f..7588aa627 100644 --- a/lenskit/lenskit/knn/item.py +++ b/lenskit/lenskit/knn/item.py @@ -15,6 +15,7 @@ import numpy as np import torch +from pydantic import BaseModel, PositiveFloat, PositiveInt, field_validator from scipy.sparse import csr_array from typing_extensions import Optional, override @@ -32,6 +33,52 @@ MAX_BLOCKS = 1024 +class ItemKNNConfig(BaseModel, extra="forbid"): + "Configuration for :class:`ItemKNNScorer`." + + k: PositiveInt = 20 + """ + The maximum number of neighbors for scoring each item. + """ + min_nbrs: PositiveInt = 1 + """ + The minimum number of neighbors for scoring each item. + """ + min_sim: PositiveFloat = 1.0e-6 + """ + Minimum similarity threshold for considering a neighbor. Must be positive; + if less than the smallest 32-bit normal (:math:`1.175 \\times 10^{-38}`), is + clamped to that value. + """ + save_nbrs: PositiveInt | None = None + """ + The number of neighbors to save per item in the trained model (``None`` for + unlimited). + """ + feedback: FeedbackType = "explicit" + """ + The type of input data to use (explicit or implicit). This affects data + pre-processing and aggregation. + """ + block_size: int = 250 + """ + The block size for computing item similarity blocks in parallel. Only + affects performance, not behavior. + """ + + @field_validator("min_sim", mode="after") + @staticmethod + def clamp_min_sim(sim) -> float: + return max(sim, float(np.finfo(np.float64).smallest_normal)) + + @property + def explicit(self) -> bool: + """ + Query whether this is in explicit-feedback mode. + """ + return self.feedback == "explicit" + + class ItemKNNScorer(Component, Trainable): """ Item-item nearest-neighbor collaborative filtering feedback. This item-item @@ -49,31 +96,9 @@ class ItemKNNScorer(Component, Trainable): Stability: Caller - - Args: - nnbrs: - The maximum number of neighbors for scoring each item (``None`` for - unlimited) - min_nbrs: - The minimum number of neighbors for scoring each item - min_sim: - Minimum similarity threshold for considering a neighbor. Must be - positive; if less than the smallest 32-bit normal (:math:`1.175 - \\times 10^{-38}`), is clamped to that value. - save_nbrs: - The number of neighbors to save per item in the trained model - (``None`` for unlimited) - feedback: - The type of input data to use (explicit or implicit). This affects - data pre-processing and aggregation. """ - nnbrs: int - min_nbrs: int = 1 - min_sim: float - save_nbrs: int | None = None - feedback: FeedbackType - block_size: int + config: ItemKNNConfig items_: Vocabulary "Vocabulary of item IDs." @@ -86,35 +111,6 @@ class ItemKNNScorer(Component, Trainable): users_: Vocabulary "Vocabulary of user IDs." - def __init__( - self, - nnbrs: int = 20, - min_nbrs: int = 1, - min_sim: float = 1.0e-6, - save_nbrs: int | None = None, - feedback: FeedbackType = "explicit", - block_size: int = 250, - ): - self.nnbrs = nnbrs - self.min_nbrs = min_nbrs - if self.min_nbrs is not None and self.min_nbrs < 1: - self.min_nbrs = 1 - self.min_sim = min_sim - self.save_nbrs = save_nbrs - self.block_size = block_size - - self.feedback = feedback - - if self.min_sim < 0: - _log.warning("item-item does not currently support negative similarities") - warnings.warn("item-item does not currently support negative similarities") - elif self.min_sim == 0: - f4i = np.finfo("f4") - _log.warning( - "minimum similarity %e is too low, using %e", self.min_sim, f4i.smallest_normal - ) - self.min_sim = float(f4i.smallest_normal) - @property def is_trained(self) -> bool: return hasattr(self, "items_") @@ -133,14 +129,14 @@ def train(self, data: Dataset): (user,item,rating) data for computing item similarities. """ ensure_parallel_init() - log = _log.bind(n_items=data.item_count, feedback=self.feedback) + log = _log.bind(n_items=data.item_count, feedback=self.config.feedback) # Training proceeds in 2 steps: # 1. Normalize item vectors to be mean-centered and unit-normalized # 2. Compute similarities with pairwise dot products self._timer = util.Stopwatch() log.info("begining IKNN training") - field = "rating" if self.feedback == "explicit" else None + field = "rating" if self.config.explicit else None init_rmat = data.interaction_matrix("torch", field=field) n_items = data.item_count log.info( @@ -153,7 +149,7 @@ def train(self, data: Dataset): # we operate on *transposed* rating matrix: items on the rows rmat = init_rmat.transpose(0, 1).to_sparse_csr().to(torch.float64) - if self.feedback == "explicit": + if self.config.explicit: rmat, means = normalize_sparse_rows(rmat, "center") if np.allclose(rmat.values(), 0.0): log.warning("normalized ratings are zero, centering is not recommended") @@ -193,10 +189,12 @@ def train(self, data: Dataset): def _compute_similarities(self, rmat: torch.Tensor) -> torch.Tensor: nitems, nusers = rmat.shape - bs = max(self.block_size, nitems // MAX_BLOCKS) + bs = max(self.config.block_size, nitems // MAX_BLOCKS) _log.debug("computing with effective block size %d", bs) with item_progress_handle("items", nitems) as pbh: - smat = _sim_blocks(rmat.to(torch.float64), self.min_sim, self.save_nbrs, bs, pbh) + smat = _sim_blocks( + rmat.to(torch.float64), self.config.min_sim, self.config.save_nbrs, bs, pbh + ) return smat.to(torch.float32) @@ -222,7 +220,7 @@ def __call__(self, query: QueryInput, items: ItemList) -> ItemList: n_valid = len(ri_valid_nums) trace(log, "%d of %d rated items in model", n_valid, len(ratings)) - if self.feedback == "explicit": + if self.config.explicit: ri_vals = ratings.field("rating", "numpy") if ri_vals is None: raise RuntimeError("explicit-feedback scorer must have ratings") @@ -251,16 +249,16 @@ def __call__(self, query: QueryInput, items: ItemList) -> ItemList: # count neighborhood sizes sizes = np.diff(model.indptr) # which neighborhoods are usable? (at least min neighbors) - scorable = sizes >= self.min_nbrs + scorable = sizes >= self.config.min_nbrs # fast-path neighborhoods that fit within max neighbors - fast = sizes <= self.nnbrs + fast = sizes <= self.config.k ti_fast_mask = ti_mask.copy() ti_fast_mask[ti_mask] = scorable & fast scores = np.full(len(items), np.nan, dtype=np.float32) fast_mod = model[:, scorable & fast] - if self.feedback == "explicit": + if self.config.explicit: scores[ti_fast_mask] = ri_vals @ fast_mod scores[ti_fast_mask] /= fast_mod.sum(axis=0) else: @@ -278,9 +276,9 @@ def __call__(self, query: QueryInput, items: ItemList) -> ItemList: ti_slow_mask[ti_mask] = ~fast slow_mat = torch.from_numpy(slow_mat.toarray()) - slow_trimmed, slow_inds = torch.topk(slow_mat, self.nnbrs) - assert slow_trimmed.shape == (n_slow, self.nnbrs) - if self.feedback == "explicit": + slow_trimmed, slow_inds = torch.topk(slow_mat, self.config.k) + assert slow_trimmed.shape == (n_slow, self.config.k) + if self.config.explicit: svals = torch.from_numpy(ri_vals)[slow_inds] assert svals.shape == slow_trimmed.shape scores[ti_slow_mask] = torch.sum(slow_trimmed * svals, axis=1).numpy() @@ -299,9 +297,6 @@ def __call__(self, query: QueryInput, items: ItemList) -> ItemList: return ItemList(items, scores=scores) - def __str__(self): - return "ItemItem(nnbrs={}, msize={})".format(self.nnbrs, self.save_nbrs) - @torch.jit.script def _sim_row( diff --git a/lenskit/lenskit/knn/user.py b/lenskit/lenskit/knn/user.py index 839cb6acd..ce3361996 100644 --- a/lenskit/lenskit/knn/user.py +++ b/lenskit/lenskit/knn/user.py @@ -17,6 +17,7 @@ import pandas as pd import structlog import torch +from pydantic import BaseModel, PositiveFloat, PositiveInt, field_validator from scipy.sparse import csc_array from typing_extensions import NamedTuple, Optional, Self, override @@ -32,6 +33,42 @@ _log = get_logger(__name__) +class UserKNNConfig(BaseModel, extra="forbid"): + "Configuration for :class:`ItemKNNScorer`." + + k: PositiveInt = 20 + """ + The maximum number of neighbors for scoring each item. + """ + min_nbrs: PositiveInt = 1 + """ + The minimum number of neighbors for scoring each item. + """ + min_sim: PositiveFloat = 1.0e-6 + """ + Minimum similarity threshold for considering a neighbor. Must be positive; + if less than the smallest 32-bit normal (:math:`1.175 \\times 10^{-38}`), is + clamped to that value. + """ + feedback: FeedbackType = "explicit" + """ + The type of input data to use (explicit or implicit). This affects data + pre-processing and aggregation. + """ + + @field_validator("min_sim", mode="after") + @staticmethod + def clamp_min_sim(sim) -> float: + return max(sim, float(np.finfo(np.float64).smallest_normal)) + + @property + def explicit(self) -> bool: + """ + Query whether this is in explicit-feedback mode. + """ + return self.feedback == "explicit" + + class UserKNNScorer(Component, Trainable): """ User-user nearest-neighbor collaborative filtering with ratings. This @@ -46,35 +83,9 @@ class UserKNNScorer(Component, Trainable): Stability: Caller - - Args: - nnbrs: - the maximum number of neighbors for scoring each item (``None`` for - unlimited). - min_nbrs: - The minimum number of neighbors for scoring each item. - min_sim: - Minimum similarity threshold for considering a neighbor. Must be - positive; if less than the smallest 32-bit normal (:math:`1.175 - \\times 10^{-38}`), is clamped to that value. - feedback: - Control how feedback should be interpreted. Specifies defaults for - the other settings, which can be overridden individually; can be one - of the following values: - - ``explicit`` - Configure for explicit-feedback mode: use rating values, and - predict using weighted averages. This is the default setting. - - ``implicit`` - Configure for implicit-feedback mode: ignore rating values, and - predict using the sums of similarities. """ - nnbrs: int - min_nbrs: int - min_sim: float - feedback: FeedbackType + config: UserKNNConfig users_: Vocabulary "The index of user IDs." @@ -87,26 +98,6 @@ class UserKNNScorer(Component, Trainable): user_ratings_: csc_array "Centered but un-normalized rating matrix (COO) to find neighbor ratings." - def __init__( - self, - nnbrs: int = 20, - min_nbrs: int = 1, - min_sim: float = 1.0e-6, - feedback: FeedbackType = "explicit", - ): - self.nnbrs = nnbrs - self.min_nbrs = min_nbrs - if min_sim < 0: - raise ValueError("minimum similarity must be positive") - elif min_sim == 0: - f4i = np.finfo("f4") - self.min_sim = float(f4i.smallest_normal) - _log.warning("minimum similarity %e is too low, using %e", min_sim, self.min_sim) - else: - self.min_sim = min_sim - - self.feedback = feedback - @property def is_trained(self) -> bool: return hasattr(self, "users_") @@ -121,12 +112,10 @@ def train(self, data: Dataset) -> Self: ratings(pandas.DataFrame): (user, item, rating) data for collaborative filtering. """ ensure_parallel_init() - rmat = data.interaction_matrix( - "torch", field="rating" if self.feedback == "explicit" else None - ) + rmat = data.interaction_matrix("torch", field="rating" if self.config.explicit else None) assert rmat.is_sparse_csr - if self.feedback == "explicit": + if self.config.explicit: rmat, means = normalize_sparse_rows(rmat, "center") if np.allclose(rmat.values(), 0.0): _log.warning("normalized ratings are zero, centering is not recommended") @@ -189,7 +178,7 @@ def __call__(self, query: QueryInput, items: ItemList) -> ItemList: # get indices for these neighbors nbr_idxs = torch.arange(len(self.users_), dtype=torch.int64) - nbr_mask = nbr_sims >= self.min_sim + nbr_mask = nbr_sims >= self.config.min_sim kn_sims = nbr_sims[nbr_mask] kn_idxs = nbr_idxs[nbr_mask] @@ -218,9 +207,9 @@ def __call__(self, query: QueryInput, items: ItemList) -> ItemList: kn_idxs, kn_sims, self.user_ratings_, - self.nnbrs, - self.min_nbrs, - self.feedback == "explicit", + self.config.k, + self.config.min_nbrs, + self.config.explicit, ) scores += umean @@ -247,7 +236,7 @@ def _get_user_data(self, query: RecQuery) -> Optional[UserRatings]: assert index >= 0 row = self.user_vectors_[index].to_dense() - if self.feedback == "explicit": + if self.config.explicit: assert self.user_means_ is not None umean = self.user_means_[index].item() else: @@ -259,7 +248,7 @@ def _get_user_data(self, query: RecQuery) -> Optional[UserRatings]: ui_nos = query.user_items.numbers("torch", missing="negative", vocabulary=self.items_) ui_mask = ui_nos >= 0 - if self.feedback == "explicit": + if self.config.explicit: urv = query.user_items.field("rating", "torch") if urv is None: _log.warning("user %s has items but no ratings", query.user_id) @@ -273,9 +262,6 @@ def _get_user_data(self, query: RecQuery) -> Optional[UserRatings]: return UserRatings(index, ratings, umean) - def __str__(self): - return "UserUser(nnbrs={}, min_sim={})".format(self.nnbrs, self.min_sim) - class UserRatings(NamedTuple): """ diff --git a/lenskit/lenskit/logging/_proxy.py b/lenskit/lenskit/logging/_proxy.py index 489a2e49d..b7134c0bc 100644 --- a/lenskit/lenskit/logging/_proxy.py +++ b/lenskit/lenskit/logging/_proxy.py @@ -8,7 +8,9 @@ _fallback_wrapper = structlog.make_filtering_bound_logger(logging.WARNING) -def get_logger(name: str, *, remove_private: bool = True) -> structlog.stdlib.BoundLogger: +def get_logger( + name: str, *, remove_private: bool = True, **init_als: Any +) -> structlog.stdlib.BoundLogger: """ Get a logger. This works like :func:`structlog.stdlib.get_logger`, except the returned proxy logger is quiet (only WARN and higher messages) if @@ -24,6 +26,8 @@ def get_logger(name: str, *, remove_private: bool = True) -> structlog.stdlib.Bo remove_private: Set to ``False`` to keep private module components of the logger name instead of removing them. + init_vals: + Initial values to bind into the logger when crated. Returns: A lazy proxy logger. The returned logger is type-compatible with :class:`structlib.stdlib.BoundLogger`, but is actually an instance of an @@ -32,7 +36,7 @@ def get_logger(name: str, *, remove_private: bool = True) -> structlog.stdlib.Bo """ if remove_private: name = re.sub(r"\._.*", "", name) - return LenskitProxyLogger(None, logger_factory_args=[name]) # type: ignore + return LenskitProxyLogger(None, logger_factory_args=[name], initial_values=init_als) # type: ignore class LenskitProxyLogger(BoundLoggerLazyProxy): diff --git a/lenskit/lenskit/metrics/_quick.py b/lenskit/lenskit/metrics/_quick.py index 9641499f4..60133d641 100644 --- a/lenskit/lenskit/metrics/_quick.py +++ b/lenskit/lenskit/metrics/_quick.py @@ -4,7 +4,7 @@ from lenskit.data import Dataset from lenskit.pipeline import Component, RecPipelineBuilder -from lenskit.types import RNGInput +from lenskit.random import RNGInput from .bulk import RunAnalysis, RunAnalysisResult from .predict import MAE, RMSE diff --git a/lenskit/lenskit/pipeline/__init__.py b/lenskit/lenskit/pipeline/__init__.py index 73662d7ee..af39710f3 100644 --- a/lenskit/lenskit/pipeline/__init__.py +++ b/lenskit/lenskit/pipeline/__init__.py @@ -16,7 +16,6 @@ from .common import RecPipelineBuilder, topn_pipeline from .components import ( Component, - Configurable, PipelineFunction, Trainable, ) @@ -33,7 +32,6 @@ "PipelineState", "Node", "PipelineFunction", - "Configurable", "Trainable", "PipelineConfig", "Lazy", diff --git a/lenskit/lenskit/pipeline/_impl.py b/lenskit/lenskit/pipeline/_impl.py index 7f1d04238..154aedc79 100644 --- a/lenskit/lenskit/pipeline/_impl.py +++ b/lenskit/lenskit/pipeline/_impl.py @@ -15,7 +15,6 @@ from . import config from .components import ( # type: ignore # noqa: F401 Component, - Configurable, PipelineFunction, Trainable, fallback_on_none, @@ -333,9 +332,9 @@ def component_configs(self) -> dict[str, dict[str, Any]]: only, it does not include pipeline inputs or wiring. """ return { - name: comp.get_config() + name: comp.dump_config() for (name, comp) in self._components.items() - if isinstance(comp, Configurable) + if isinstance(comp, Component) } def clone(self, how: CloneMethod = "config") -> Pipeline: @@ -381,8 +380,8 @@ def clone(self, how: CloneMethod = "config") -> Pipeline: case ComponentNode(name, comp, _inputs, wiring): if isinstance(comp, FunctionType): comp = comp - elif isinstance(comp, Configurable): - comp = comp.__class__.from_config(comp.get_config()) # type: ignore + elif isinstance(comp, Component): + comp = comp.__class__(comp.config) # type: ignore else: comp = comp.__class__() # type: ignore cn = clone.add_component(node.name, comp) # type: ignore diff --git a/lenskit/lenskit/pipeline/components.py b/lenskit/lenskit/pipeline/components.py index 6e8ed0e7e..7167ef4fe 100644 --- a/lenskit/lenskit/pipeline/components.py +++ b/lenskit/lenskit/pipeline/components.py @@ -11,12 +11,23 @@ import inspect import json +import warnings from abc import abstractmethod from importlib import import_module from types import FunctionType -from typing import Callable, ClassVar, Generic, ParamSpec, TypeAlias - -from typing_extensions import Any, Protocol, Self, TypeVar, override, runtime_checkable +from typing import ( + Any, + Callable, + Mapping, + ParamSpec, + Protocol, + TypeAlias, + TypeVar, + get_origin, + runtime_checkable, +) + +from pydantic import JsonValue, TypeAdapter from lenskit.data.dataset import Dataset @@ -24,48 +35,12 @@ P = ParamSpec("P") T = TypeVar("T") +Cfg = TypeVar("Cfg") # COut is only return, so Component[U] can be assigned to Component[T] if U ≼ T. COut = TypeVar("COut", covariant=True) PipelineFunction: TypeAlias = Callable[..., COut] -@runtime_checkable -class Configurable(Protocol): # pragma: nocover - """ - Interface for configurable objects such as pipeline components with settings - or hyperparameters. A configurable object supports two operations: - - * saving its configuration with :meth:`get_config`. - * creating a new instance from a saved configuration with the class method - :meth:`from_config`. - - An object must implement both of these methods to be considered - configurable. Components extending the :class:`Component` automatically - have working versions of these methods if they define their constructor - parameters and fields appropriately. - - .. note:: - - Configuration data should be JSON-compatible (strings, numbers, etc.). - - Stability: - Full - """ - - @classmethod - def from_config(cls, cfg: dict[str, Any]) -> Self: - """ - Reinstantiate this component from configuration values. - """ - raise NotImplementedError() - - def get_config(self) -> dict[str, object]: - """ - Get this component's configured hyperparameters. - """ - raise NotImplementedError() - - @runtime_checkable class Trainable(Protocol): # pragma: nocover """ @@ -155,71 +130,115 @@ def load_params(self, params: dict[str, object]) -> None: raise NotImplementedError() -class Component(Configurable, Generic[COut]): +class Component: """ Base class for pipeline component objects. Any component that is not just a function should extend this class. - Components are :class:`Configurable`. The base class provides default - implementations of :meth:`get_config` and :meth:`from_config` that inspect - the constructor arguments and instance variables to automatically provide - configuration support. By default, all constructor parameters will be - considered configuration parameters, and their values will be read from - instance variables of the same name. Components can also define - :data:`EXTRA_CONFIG_FIELDS` and :data:`IGNORED_CONFIG_FIELDS` class - variables to modify this behavior. Missing attributes are silently ignored. + Pipeline components support configuration (e.g., hyperparameters or random + seeds) through Pydantic models or Python dataclasses; see + :ref:`component-config` for further details. If the pipeline's + configuration class is ``C``, it has the following: + + 1. The configuration is exposed through an instance variable ``config``. + 2. The constructor accepts the configuration object as its first parameter, + also named ``config``, and saves this in the member variable. + + The base class constructor handles both of these, so long as you declare the + type of the ``config`` member:: + + class MyComponent(Component): + config: MyComponentConfig + + ... + + If you do not declare a ``config`` attribute, the base class will assume the + pipeline uses no configuration. To work as components, derived classes also need to implement a ``__call__`` method to perform their operations. + Args: + config: + The configuration object. If ``None``, the configuration class will + be instantiated with ``kwargs``. + Stability: Full """ - EXTRA_CONFIG_FIELDS: ClassVar[list[str]] = [] + config: Any = None + """ + The component configuration object. Component classes that support + configuration **must** redefine this attribute with their specific + configuration class type, which can be a Python dataclass or a Pydantic + model class. """ - Names of instance variables that should be included in the configuration - dictionary even though they do not correspond to named constructor - arguments. - .. note:: + def __init_subclass__(cls, **kwargs: Any): + super().__init_subclass__(**kwargs) + annots = inspect.get_annotations(cls) + if annots.get("config", None) == Any: + warnings.warn( + "component class {} does not define a config attribute".format(cls.__qualname__), + stacklevel=2, + ) - This is rarely needed, and usually needs to be coupled with ``**kwargs`` - in the constructor to make the resulting objects constructible. - """ + def __init__(self, config: object | None = None, **kwargs: Any): + if config is None: + config = self.validate_config(kwargs) + elif kwargs: + raise RuntimeError("cannot supply both a configuration object and kwargs") - IGNORED_CONFIG_FIELDS: ClassVar[list[str]] = [] - """ - Names of constructor parameters that should be excluded from the - configuration dictionary. - """ + cfg_cls = self._config_class() + if cfg_cls and not isinstance(config, cfg_cls): + raise TypeError(f"invalid configuration type {type(config)}") - @override - def get_config(self) -> dict[str, object]: + self.config = config + + @classmethod + def _config_class(cls) -> type | None: + for base in cls.__mro__: + annots = inspect.get_annotations(base, eval_str=True) + ct = annots.get("config", None) + if ct == Any: + return None + + if isinstance(ct, type): + return ct + elif ct is not None: # pragma: nocover + warnings.warn("config attribute is not annotated with a plain type") + return get_origin(ct) + + def dump_config(self) -> dict[str, JsonValue]: """ - Get the configuration by inspecting the constructor and instance - variables. + Dump the configuration to JSON-serializable format. """ - sig = inspect.signature(self.__class__) - names = list(sig.parameters.keys()) + self.EXTRA_CONFIG_FIELDS - params: dict[str, Any] = {} - for name in names: - if name not in self.IGNORED_CONFIG_FIELDS and hasattr(self, name): - params[name] = getattr(self, name) - - return params + cfg_cls = self._config_class() + if cfg_cls: + return TypeAdapter(cfg_cls).dump_python(self.config, mode="json") + else: + return {} - @override @classmethod - def from_config(cls, cfg: dict[str, Any]) -> Self: + def validate_config(cls, data: Mapping[str, JsonValue] | None = None) -> object | None: """ - Create a class from the specified construction. Configuration elements - are passed to the constructor as keywrod arguments. + Validate and return a configuration object for this component. """ - return cls(**cfg) + if data is None: + data = {} + cfg_cls = cls._config_class() + if cfg_cls: + return TypeAdapter(cfg_cls).validate_python(data) + elif data: # pragma: nocover + raise RuntimeError( + "supplied configuration options but {} has no config class".format(cls.__name__) + ) + else: + return None @abstractmethod - def __call__(self, **kwargs: Any) -> COut: + def __call__(self, **kwargs: Any) -> Any: # pragma: nocover """ Run the pipeline's operation and produce a result. This is the key method for components to implement. @@ -236,7 +255,7 @@ def __call__(self, **kwargs: Any) -> COut: ... def __repr__(self) -> str: - params = json.dumps(self.get_config(), indent=2) + params = json.dumps(self.dump_config(), indent=4) return f"<{self.__class__.__name__} {params}>" @@ -260,11 +279,10 @@ def instantiate_component( if isinstance(comp, FunctionType): return comp - elif issubclass(comp, Configurable): - if config is None: - config = {} - return comp.from_config(config) # type: ignore - else: + elif issubclass(comp, Component): + cfg = comp.validate_config(config) + return comp(cfg) + else: # pragma: nocover return comp() # type: ignore diff --git a/lenskit/lenskit/pipeline/config.py b/lenskit/lenskit/pipeline/config.py index ad092a862..7b255da2e 100644 --- a/lenskit/lenskit/pipeline/config.py +++ b/lenskit/lenskit/pipeline/config.py @@ -16,12 +16,12 @@ from collections import OrderedDict from hashlib import sha256 from types import FunctionType -from typing import Literal +from typing import Literal, Mapping from pydantic import BaseModel, Field, JsonValue, ValidationError from typing_extensions import Any, Optional, Self -from .components import Configurable +from .components import Component from .nodes import ComponentNode, InputNode from .types import type_string @@ -103,7 +103,7 @@ class PipelineComponent(BaseModel): This is a Python qualified path of the form ``module:name``. """ - config: dict[str, object] | None = Field(default=None) + config: Mapping[str, JsonValue] | None = Field(default=None) """ The component configuration. If not provided, the component will be created with its default constructor parameters. @@ -128,7 +128,7 @@ def from_node(cls, node: ComponentNode[Any], mapping: dict[str, str] | None = No code = f"{ctype.__module__}:{ctype.__qualname__}" - config = comp.get_config() if isinstance(comp, Configurable) else None + config = comp.dump_config() if isinstance(comp, Component) else None return cls( code=code, diff --git a/lenskit/lenskit/util/random.py b/lenskit/lenskit/random.py similarity index 86% rename from lenskit/lenskit/util/random.py rename to lenskit/lenskit/random.py index bf42fdeba..e0e9118f3 100644 --- a/lenskit/lenskit/util/random.py +++ b/lenskit/lenskit/random.py @@ -18,14 +18,41 @@ from typing_extensions import Any, Literal, Protocol, Sequence, TypeAlias, override from lenskit.data import RecQuery -from lenskit.types import RNGInput, SeedLike + +SeedLike: TypeAlias = int | Sequence[int] | np.random.SeedSequence +""" +Type for RNG seeds (see `SPEC 7`_). + +.. _SPEC 7: https://scientific-python.org/specs/spec-0007/ +""" + +RNGLike: TypeAlias = np.random.Generator | np.random.BitGenerator +""" +Type for random number generators as inputs (see `SPEC 7`_). + +.. _SPEC 7: https://scientific-python.org/specs/spec-0007/ +""" + +RNGInput: TypeAlias = SeedLike | RNGLike | None +""" +Type for RNG inputs (see `SPEC 7`_). + +.. _SPEC 7: https://scientific-python.org/specs/spec-0007/ +""" + +ConfiguredSeed: TypeAlias = int | Sequence[int] | None +""" +Random number seed that can be configured. +""" SeedDependency = Literal["user"] -DerivableSeed: TypeAlias = SeedLike | SeedDependency | tuple[SeedLike, SeedDependency] | None _global_rng: Generator | None = None +DerivableSeed: TypeAlias = ConfiguredSeed | SeedDependency | tuple[ConfiguredSeed, SeedDependency] + + def set_global_rng(seed: RNGInput): """ Set the global default RNG. @@ -145,7 +172,7 @@ def derivable_rng(spec: DerivableSeed) -> RNGFactory: Seed specifications may be any of the following: - - A seed (:type:`~lenskit.types.SeedLike`). + - A seed (:type:`~lenskit.random.SeedLike`). - The value ``'user'``, which will derive a seed from the query user ID. - A tuple of the form ``(seed, 'user')``, that will use ``seed`` as the basis and drive from it a new seed based on the user ID. diff --git a/lenskit/lenskit/splitting/holdout.py b/lenskit/lenskit/splitting/holdout.py index 4574800a0..fbf4b69d7 100644 --- a/lenskit/lenskit/splitting/holdout.py +++ b/lenskit/lenskit/splitting/holdout.py @@ -13,8 +13,7 @@ import numpy as np from lenskit.data import ItemList -from lenskit.types import RNGInput -from lenskit.util.random import random_generator +from lenskit.random import RNGInput, random_generator class HoldoutMethod(Protocol): diff --git a/lenskit/lenskit/splitting/records.py b/lenskit/lenskit/splitting/records.py index 6468df5e2..5ed5c6cb3 100644 --- a/lenskit/lenskit/splitting/records.py +++ b/lenskit/lenskit/splitting/records.py @@ -14,8 +14,7 @@ from lenskit.data import Dataset, ItemListCollection, UserIDKey from lenskit.data.matrix import MatrixDataset -from lenskit.types import RNGInput -from lenskit.util.random import random_generator +from lenskit.random import RNGInput, random_generator from .split import TTSplit diff --git a/lenskit/lenskit/splitting/users.py b/lenskit/lenskit/splitting/users.py index a4f2e7cc4..a651a2f92 100644 --- a/lenskit/lenskit/splitting/users.py +++ b/lenskit/lenskit/splitting/users.py @@ -16,8 +16,7 @@ from lenskit.data import NPID, Dataset, ItemListCollection, UserIDKey from lenskit.data.matrix import MatrixDataset from lenskit.logging import item_progress -from lenskit.types import RNGInput -from lenskit.util.random import random_generator +from lenskit.random import RNGInput, random_generator from .holdout import HoldoutMethod from .split import TTSplit diff --git a/lenskit/lenskit/testing/_components.py b/lenskit/lenskit/testing/_components.py index 7338f61b9..d1e411ef0 100644 --- a/lenskit/lenskit/testing/_components.py +++ b/lenskit/lenskit/testing/_components.py @@ -23,31 +23,38 @@ def test_instantiate_default(self): inst = self.component() assert inst is not None + if self.component._config_class() is not None: + assert inst.config is not None + else: + assert inst.config is None + def test_default_config_vars(self): inst = self.component() - cfg = inst.get_config() + cfg = inst.dump_config() for name, value in cfg.items(): - assert getattr(inst, name) == value + assert hasattr(inst.config, name) def test_default_config_round_trip(self): inst = self.component() - cfg = inst.get_config() + cfg = inst.dump_config() - i2 = self.component.from_config(cfg) + i2 = self.component(self.component.validate_config(cfg)) assert i2 is not inst assert isinstance(i2, self.component) - assert i2.get_config() == cfg + print(cfg) + print(i2.dump_config()) + assert i2.dump_config() == cfg def test_config_round_trip(self): if not self.configs: skip("no test configs specified") for cfg in self.configs: - inst = self.component.from_config(cfg) - c1 = inst.get_config() + inst = self.component(self.component.validate_config(cfg)) + c1 = inst.dump_config() - i2 = self.component.from_config(c1) - c2 = i2.get_config() + i2 = self.component(self.component.validate_config(c1)) + c2 = i2.dump_config() # config may be changed from source (due to normalization), but should # round-trip. assert c2 == c1 diff --git a/lenskit/lenskit/testing/_movielens.py b/lenskit/lenskit/testing/_movielens.py index 7443e02e6..d93162e8a 100644 --- a/lenskit/lenskit/testing/_movielens.py +++ b/lenskit/lenskit/testing/_movielens.py @@ -113,7 +113,7 @@ def demo_recs() -> tuple[TTSplit, ItemListCollection[UserIDKey]]: builder = RecPipelineBuilder() builder.scorer(PopScorer()) - builder.ranker(SoftmaxRanker(500)) + builder.ranker(SoftmaxRanker(n=500)) pipe = builder.build() pipe.train(split.train) diff --git a/lenskit/lenskit/types.py b/lenskit/lenskit/types.py deleted file mode 100644 index 5dd0584b6..000000000 --- a/lenskit/lenskit/types.py +++ /dev/null @@ -1,36 +0,0 @@ -# This file is part of LensKit. -# Copyright (C) 2018-2023 Boise State University -# Copyright (C) 2023-2024 Drexel University -# Licensed under the MIT license, see LICENSE.md for details. -# SPDX-License-Identifier: MIT -""" -Types used across LensKit. -""" - -# pyright: strict -from __future__ import annotations - -from typing import Any, Sequence, TypeAlias - -import numpy as np - -SeedLike: TypeAlias = int | np.integer[Any] | Sequence[int] | np.random.SeedSequence -""" -Type for RNG seeds (see `SPEC 7`_). - -.. _SPEC 7: https://scientific-python.org/specs/spec-0007/ -""" - -RNGLike: TypeAlias = np.random.Generator | np.random.BitGenerator -""" -Type for random number generators as inputs (see `SPEC 7`_). - -.. _SPEC 7: https://scientific-python.org/specs/spec-0007/ -""" - -RNGInput: TypeAlias = SeedLike | RNGLike | None -""" -Type for RNG inputs (see `SPEC 7`_). - -.. _SPEC 7: https://scientific-python.org/specs/spec-0007/ -""" diff --git a/lenskit/lenskit/util/__init__.py b/lenskit/lenskit/util/__init__.py index a92cc7a14..ef8d59cc7 100644 --- a/lenskit/lenskit/util/__init__.py +++ b/lenskit/lenskit/util/__init__.py @@ -12,7 +12,7 @@ from textwrap import dedent from typing import Any, Protocol, TypeVar, runtime_checkable -from .random import derivable_rng, random_generator, set_global_rng +from ..random import derivable_rng, random_generator, set_global_rng from .timing import Stopwatch try: diff --git a/lenskit/tests/basic/test_bias.py b/lenskit/tests/basic/test_bias.py index f416302d8..d04facd96 100644 --- a/lenskit/tests/basic/test_bias.py +++ b/lenskit/tests/basic/test_bias.py @@ -33,7 +33,7 @@ class TestBias(BasicComponentTests, ScorerTests): component = BiasScorer needs_jit = False - configs = [{"damping": 10}, {"damping": (5, 25)}] + configs = [{"damping": 10}, {"damping": {"user": 5, "item": 25}}] can_score = "all" @@ -82,30 +82,31 @@ def test_bias_clone(): bias = BiasScorer() bias.train(simple_ds) - params = bias.get_config() - assert sorted(params.keys()) == ["damping", "items", "users"] + params = bias.dump_config() + assert sorted(params.keys()) == ["damping", "entities"] - a2 = BiasScorer.from_config(params) + a2 = BiasScorer(BiasScorer.validate_config(params)) assert a2 is not bias assert getattr(a2, "model_", None) is None def test_bias_clone_damping(): - bias = BiasScorer(damping=(10, 5)) + bias = BiasScorer(damping={"user": 10, "item": 5}) bias.train(simple_ds) - params = bias.get_config() - assert sorted(params.keys()) == ["damping", "items", "users"] + params = bias.dump_config() + assert sorted(params.keys()) == ["damping", "entities"] - a2 = BiasScorer.from_config(params) + a2 = BiasScorer(BiasScorer.validate_config(params)) assert a2 is not bias - assert a2.damping.user == 10 - assert a2.damping.item == 5 + assert isinstance(a2.config.damping, dict) + assert a2.config.damping["user"] == 10 + assert a2.config.damping["item"] == 5 assert getattr(a2, "model_", None) is None def test_bias_global_only(): - bias = BiasModel.learn(simple_ds, users=False, items=False) + bias = BiasModel.learn(simple_ds, entities=[]) assert bias.global_bias == approx(3.5) assert bias.items is None assert bias.item_biases is None @@ -114,7 +115,7 @@ def test_bias_global_only(): def test_bias_no_user(): - bias = BiasModel.learn(simple_ds, users=False) + bias = BiasModel.learn(simple_ds, entities={"item"}) assert bias.global_bias == approx(3.5) assert bias.item_biases is not None @@ -124,7 +125,7 @@ def test_bias_no_user(): def test_bias_no_item(): - bias = BiasModel.learn(simple_ds, items=False) + bias = BiasModel.learn(simple_ds, entities={"user"}) assert bias.global_bias == approx(3.5) assert bias.item_biases is None @@ -133,7 +134,7 @@ def test_bias_no_item(): def test_bias_global_predict(): - bias = BiasScorer(items=False, users=False) + bias = BiasScorer(entities=[]) bias.train(simple_ds) p = bias(10, ItemList(item_ids=[1, 2, 3])) @@ -143,7 +144,7 @@ def test_bias_global_predict(): def test_bias_item_predict(): - bias = BiasScorer(users=False) + bias = BiasScorer(entities={"item"}) bias.train(simple_ds) assert bias.model_.item_biases is not None @@ -154,7 +155,7 @@ def test_bias_item_predict(): def test_bias_user_predict(): - bias = BiasScorer(items=False) + bias = BiasScorer(entities={"user"}) bias.train(simple_ds) bm = bias.model_ p = bias(10, ItemList(item_ids=[1, 2, 3])) @@ -245,7 +246,7 @@ def test_bias_train_ml_ratings(ml_ratings: pd.DataFrame, ml_ds: Dataset): def test_bias_item_damp(): - bias = BiasModel.learn(simple_ds, users=False, damping=5) + bias = BiasModel.learn(simple_ds, entities={"item"}, damping=5) assert bias.global_bias == approx(3.5) assert bias.item_biases is not None @@ -255,7 +256,7 @@ def test_bias_item_damp(): def test_bias_user_damp(): - bias = BiasModel.learn(simple_ds, items=False, damping=5) + bias = BiasModel.learn(simple_ds, entities={"user"}, damping=5) assert bias.global_bias == approx(3.5) assert bias.item_biases is None diff --git a/lenskit/tests/basic/test_popular.py b/lenskit/tests/basic/test_popular.py index 4c854bf64..17a691cd6 100644 --- a/lenskit/tests/basic/test_popular.py +++ b/lenskit/tests/basic/test_popular.py @@ -36,7 +36,7 @@ def test_popscore_quantile(rng, ml_ds): def test_popscore_rank(rng, ml_ds): - pop = PopScorer("rank") + pop = PopScorer(score="rank") pop.train(ml_ds) counts = ml_ds.item_stats()["count"] @@ -49,7 +49,7 @@ def test_popscore_rank(rng, ml_ds): def test_popscore_counts(rng, ml_ds): - pop = PopScorer("count") + pop = PopScorer(score="count") pop.train(ml_ds) counts = ml_ds.item_stats()["count"] diff --git a/lenskit/tests/basic/test_random.py b/lenskit/tests/basic/test_random.py index 56b3da1be..d6af6d544 100644 --- a/lenskit/tests/basic/test_random.py +++ b/lenskit/tests/basic/test_random.py @@ -36,7 +36,7 @@ def test_unlimited_selection(items: ItemList): @given(st.integers(min_value=1, max_value=100), scored_lists()) def test_configured_truncation(n, items: ItemList): - rsel = RandomSelector(n) + rsel = RandomSelector(n=n) ranked = rsel(items=items) assert len(ranked) == min(n, len(items)) diff --git a/lenskit/tests/basic/test_softmax.py b/lenskit/tests/basic/test_softmax.py index 3b4c17a35..3ec78de82 100644 --- a/lenskit/tests/basic/test_softmax.py +++ b/lenskit/tests/basic/test_softmax.py @@ -61,7 +61,7 @@ def test_unlimited_ranking(items: ItemList): @given(st.integers(min_value=1, max_value=100), scored_lists()) def test_configured_truncation(n, items: ItemList): - topn = SoftmaxRanker(n) + topn = SoftmaxRanker(n=n) ranked = topn(items=items) ids = items.ids() diff --git a/lenskit/tests/basic/test_time_bounded_popular.py b/lenskit/tests/basic/test_time_bounded_popular.py index 9936587d5..ee5577de3 100644 --- a/lenskit/tests/basic/test_time_bounded_popular.py +++ b/lenskit/tests/basic/test_time_bounded_popular.py @@ -29,13 +29,13 @@ def test_time_bounded_pop_score_quantile_one_day_window(): - algo = popularity.TimeBoundedPopScore(one_day_ago) + algo = popularity.TimeBoundedPopScore(cutoff=one_day_ago) algo.train(simple_ds) assert np.all(algo.item_scores_ == [1.0, 0.0, 0.0]) def test_time_bounded_pop_score_quantile_one_day_window_call_interface(): - algo = popularity.TimeBoundedPopScore(one_day_ago) + algo = popularity.TimeBoundedPopScore(cutoff=one_day_ago) algo.train(simple_ds) p = algo(ItemList(item_ids=[1, 2, 3])) @@ -44,7 +44,7 @@ def test_time_bounded_pop_score_quantile_one_day_window_call_interface(): def test_time_bounded_pop_score_quantile_two_day_window(): - algo = popularity.TimeBoundedPopScore(two_days_ago) + algo = popularity.TimeBoundedPopScore(cutoff=two_days_ago) algo.train(simple_ds) assert np.all(algo.item_scores_ == pd.Series([0.25, 1.0, 0.5], index=[1, 2, 3])) @@ -52,25 +52,25 @@ def test_time_bounded_pop_score_quantile_two_day_window(): def test_time_bounded_pop_score_fallbacks_to_pop_score_for_dataset_without_timestamps(): ds = from_interactions_df(simple_df.drop(columns=["timestamp"])) - algo = popularity.TimeBoundedPopScore(one_day_ago) + algo = popularity.TimeBoundedPopScore(cutoff=one_day_ago) algo.train(ds) assert np.all(algo.item_scores_ == pd.Series([0.25, 1.0, 0.5], index=[1, 2, 3])) def test_time_bounded_pop_score_rank(): - algo = popularity.TimeBoundedPopScore(two_days_ago, "rank") + algo = popularity.TimeBoundedPopScore(cutoff=two_days_ago, score="rank") algo.train(simple_ds) assert np.all(algo.item_scores_ == pd.Series([1.5, 3.0, 1.5], index=[1, 2, 3])) def test_time_bounded_pop_score_counts(): - algo = popularity.TimeBoundedPopScore(two_days_ago, "count") + algo = popularity.TimeBoundedPopScore(cutoff=two_days_ago, score="count") algo.train(simple_ds) assert np.all(algo.item_scores_ == pd.Series([1, 2, 1], index=[1, 2, 3], dtype=np.int32)) def test_time_bounded_pop_score_save_load(): - original = popularity.TimeBoundedPopScore(one_day_ago) + original = popularity.TimeBoundedPopScore(cutoff=one_day_ago) original.train(simple_ds) mod = pickle.dumps(original) diff --git a/lenskit/tests/basic/test_topn.py b/lenskit/tests/basic/test_topn.py index 7280b07f0..75a03ea53 100644 --- a/lenskit/tests/basic/test_topn.py +++ b/lenskit/tests/basic/test_topn.py @@ -67,7 +67,7 @@ def test_unlimited_ranking(items: ItemList): @given(st.integers(min_value=1, max_value=100), scored_lists()) def test_configured_truncation(n, items: ItemList): - topn = TopNRanker(n) + topn = TopNRanker(n=n) ranked = topn(items=items) ids = items.ids() diff --git a/lenskit/tests/models/test_als_explicit.py b/lenskit/tests/models/test_als_explicit.py index 4503f4121..213bcf03c 100644 --- a/lenskit/tests/models/test_als_explicit.py +++ b/lenskit/tests/models/test_als_explicit.py @@ -31,7 +31,7 @@ class TestExplicitALS(BasicComponentTests, ScorerTests): def test_als_basic_build(): - algo = BiasedMFScorer(20, epochs=10) + algo = BiasedMFScorer(features=20, epochs=10) algo.train(simple_ds) assert algo.bias_ is not None @@ -44,13 +44,13 @@ def test_als_basic_build(): assert algo.user_features_.shape == (3, 20) assert algo.item_features_.shape == (3, 20) - assert algo.features == 20 + assert algo.config.features == 20 assert len(algo.users_) == 3 assert len(algo.items_) == 3 def test_als_predict_basic(): - algo = BiasedMFScorer(20, epochs=10) + algo = BiasedMFScorer(features=20, epochs=10) algo.train(simple_ds) assert algo.bias_ is not None @@ -67,7 +67,7 @@ def test_als_predict_basic(): def test_als_predict_basic_for_new_ratings(): - algo = BiasedMFScorer(20, epochs=10) + algo = BiasedMFScorer(features=20, epochs=10) algo.train(simple_ds) assert algo.bias_ is not None @@ -88,7 +88,7 @@ def test_als_predict_basic_for_new_user_with_new_ratings(): u = 10 i = 3 - algo = BiasedMFScorer(20, epochs=10) + algo = BiasedMFScorer(features=20, epochs=10) algo.train(simple_ds) preds = algo(query=u, items=ItemList([i])) @@ -111,7 +111,7 @@ def test_als_predict_for_new_users_with_new_ratings(rng, ml_ds: Dataset): users = rng.choice(ml_ds.users.ids(), n_users) items = rng.choice(ml_ds.items.ids(), n_items) - algo = BiasedMFScorer(20, epochs=10) + algo = BiasedMFScorer(features=20, epochs=10) algo.train(ml_ds) _log.debug("Items: " + str(items)) @@ -139,7 +139,7 @@ def test_als_predict_for_new_users_with_new_ratings(rng, ml_ds: Dataset): def test_als_predict_bad_item(): - algo = BiasedMFScorer(20, epochs=10) + algo = BiasedMFScorer(features=20, epochs=10) algo.train(simple_ds) assert algo.bias_ is not None @@ -154,7 +154,7 @@ def test_als_predict_bad_item(): def test_als_predict_bad_user(): - algo = BiasedMFScorer(20, epochs=10) + algo = BiasedMFScorer(features=20, epochs=10) algo.train(simple_ds) assert algo.bias_ is not None @@ -174,14 +174,14 @@ def test_als_predict_no_user_features_basic(rng: np.random.Generator, ml_ds: Dat u = rng.choice(ml_ds.users.ids(), 1).item() items = rng.choice(ml_ds.items.ids(), n_items) - algo = BiasedMFScorer(5, epochs=10) + algo = BiasedMFScorer(features=5, epochs=10) algo.train(ml_ds) _log.debug("Items: " + str(items)) assert algo.bias_ is not None assert algo.users_ is not None assert algo.user_features_ is not None - algo_no_user_features = BiasedMFScorer(5, epochs=10, save_user_features=False) + algo_no_user_features = BiasedMFScorer(features=5, epochs=10, save_user_features=False) algo_no_user_features.train(ml_ds) assert algo_no_user_features.user_features_ is None @@ -205,7 +205,7 @@ def test_als_predict_no_user_features_basic(rng: np.random.Generator, ml_ds: Dat @wantjit @mark.slow def test_als_train_large(ml_ratings, ml_ds: Dataset): - algo = BiasedMFScorer(20, epochs=10) + algo = BiasedMFScorer(features=20, epochs=10) algo.train(ml_ds) assert algo.bias_ is not None @@ -213,7 +213,7 @@ def test_als_train_large(ml_ratings, ml_ds: Dataset): assert algo.user_features_ is not None assert algo.bias_.global_bias == approx(ml_ratings.rating.mean()) - assert algo.features == 20 + assert algo.config.features == 20 assert len(algo.items_) == ml_ratings["item_id"].nunique() assert len(algo.users_) == ml_ratings["user_id"].nunique() @@ -232,7 +232,7 @@ def test_als_train_large(ml_ratings, ml_ds: Dataset): # don't use wantjit, use this to do a non-JIT test def test_als_save_load(ml_ds: Dataset): - original = BiasedMFScorer(5, epochs=5) + original = BiasedMFScorer(features=5, epochs=5) original.train(ml_ds) assert original.bias_ is not None @@ -260,7 +260,7 @@ def test_als_save_load(ml_ds: Dataset): def test_als_batch_accuracy(ml_100k): ds = from_interactions_df(ml_100k) results = quick_measure_model( - BiasedMFScorer(25, epochs=20, damping=5), ds, predicts_ratings=True + BiasedMFScorer(features=25, epochs=20, damping=5), ds, predicts_ratings=True ) assert results.global_metrics()["MAE"] == approx(0.73, abs=0.045) diff --git a/lenskit/tests/models/test_als_implicit.py b/lenskit/tests/models/test_als_implicit.py index 8fc922112..09716eb4f 100644 --- a/lenskit/tests/models/test_als_implicit.py +++ b/lenskit/tests/models/test_als_implicit.py @@ -33,7 +33,7 @@ class TestImplicitALS(BasicComponentTests, ScorerTests): def test_als_basic_build(): - algo = ImplicitMFScorer(20, epochs=10) + algo = ImplicitMFScorer(features=20, epochs=10) algo.train(simple_ds) assert algo.users_ is not None @@ -46,7 +46,7 @@ def test_als_basic_build(): def test_als_predict_basic(): - algo = ImplicitMFScorer(20, epochs=10) + algo = ImplicitMFScorer(features=20, epochs=10) algo.train(simple_ds) preds = algo(query=10, items=ItemList([3])) @@ -61,7 +61,7 @@ def test_als_predict_basic(): def test_als_predict_basic_for_new_ratings(): """Test ImplicitMF ability to support new ratings""" - algo = ImplicitMFScorer(20, epochs=10) + algo = ImplicitMFScorer(features=20, epochs=10) algo.train(simple_ds) query = RecQuery(15, ItemList([1, 2])) @@ -83,7 +83,7 @@ def test_als_predict_basic_for_new_user_with_new_ratings(): u = 10 i = 3 - algo = ImplicitMFScorer(20, epochs=10, use_ratings=True) + algo = ImplicitMFScorer(features=20, epochs=10, use_ratings=True) algo.train(simple_dsr) preds = algo(u, ItemList([i])) @@ -111,7 +111,7 @@ def test_als_predict_for_new_users_with_new_ratings(rng: np.random.Generator, ml users = rng.choice(ml_ds.users.ids(), n_users) items = ItemList(rng.choice(ml_ds.items.ids(), n_items)) - algo = ImplicitMFScorer(20, epochs=10, use_ratings=False) + algo = ImplicitMFScorer(features=20, epochs=10, use_ratings=False) algo.train(ml_ds) assert algo.users_ is not None assert algo.user_features_ is not None @@ -133,12 +133,12 @@ def test_als_predict_for_new_users_with_new_ratings(rng: np.random.Generator, ml ifs = algo.item_features_[user_data.numbers(vocabulary=algo.items_), :] fit_uv = algo.user_features_[upos, :] nr_info["fit_recon"] = ifs @ fit_uv - nr_info["fit_sqerr"] = np.square(algo.weight + 1.0 - nr_info["fit_recon"]) + nr_info["fit_sqerr"] = np.square(algo.config.weight + 1.0 - nr_info["fit_recon"]) _log.debug("user_features from fit:\n%s", fit_uv) new_uv, _new_off = algo.new_user_embedding(new_u_id, user_data) nr_info["new_recon"] = ifs @ new_uv - nr_info["new_sqerr"] = np.square(algo.weight + 1.0 - nr_info["new_recon"]) + nr_info["new_sqerr"] = np.square(algo.config.weight + 1.0 - nr_info["new_recon"]) _log.debug("user features from new:\n%s", new_uv) @@ -171,7 +171,7 @@ def test_als_recs_topn_for_new_users_with_new_ratings( users = rng.choice(ml_ds.users.ids(), n_users).tolist() - algo = ImplicitMFScorer(20, epochs=10, use_ratings=True) + algo = ImplicitMFScorer(features=20, epochs=10, use_ratings=True) pipe = topn_pipeline(algo, n=10) pipe.train(ml_ds) assert algo.users_ is not None @@ -214,7 +214,7 @@ def test_als_recs_topn_for_new_users_with_new_ratings( def test_als_predict_bad_item(): - algo = ImplicitMFScorer(20, epochs=10) + algo = ImplicitMFScorer(features=20, epochs=10) algo.train(simple_ds) preds = algo(10, ItemList([4])) @@ -226,7 +226,7 @@ def test_als_predict_bad_item(): def test_als_predict_bad_user(): - algo = ImplicitMFScorer(20, epochs=10) + algo = ImplicitMFScorer(features=20, epochs=10) algo.train(simple_ds) preds = algo(50, ItemList([3])) @@ -242,7 +242,7 @@ def test_als_predict_no_user_features_basic(ml_ratings: pd.DataFrame, ml_ds: Dat u = np.random.choice(ml_ds.users.ids(), 1)[0] items = np.random.choice(ml_ds.items.ids(), 2) - algo = ImplicitMFScorer(5, epochs=10) + algo = ImplicitMFScorer(features=5, epochs=10) algo.train(ml_ds) preds = algo(u, ItemList(items)) preds = preds.scores("pandas", index="ids") @@ -250,7 +250,7 @@ def test_als_predict_no_user_features_basic(ml_ratings: pd.DataFrame, ml_ds: Dat user_data = ml_ds.user_row(u) - algo_no_user_features = ImplicitMFScorer(5, epochs=10, save_user_features=False) + algo_no_user_features = ImplicitMFScorer(features=5, epochs=10, save_user_features=False) algo_no_user_features.train(ml_ds) query = RecQuery(u, user_data) preds_no_user_features = algo_no_user_features(query, ItemList(items)) @@ -265,7 +265,7 @@ def test_als_predict_no_user_features_basic(ml_ratings: pd.DataFrame, ml_ds: Dat @wantjit def test_als_train_large(ml_ds: Dataset): - algo = ImplicitMFScorer(20, epochs=20, use_ratings=False) + algo = ImplicitMFScorer(features=20, epochs=20, use_ratings=False) algo.train(ml_ds) assert algo.users_ is not None @@ -278,7 +278,9 @@ def test_als_train_large(ml_ds: Dataset): def test_als_save_load(tmp_path, ml_ds: Dataset): "Test saving and loading ALS models, and regularized training." - algo = ImplicitMFScorer(5, epochs=5, reg=(2, 1), use_ratings=False) + algo = ImplicitMFScorer( + features=5, epochs=5, regularization={"user": 2, "item": 1}, use_ratings=False + ) algo.train(ml_ds) assert algo.users_ is not None @@ -297,7 +299,7 @@ def test_als_save_load(tmp_path, ml_ds: Dataset): @wantjit def test_als_train_large_noratings(ml_ds: Dataset): - algo = ImplicitMFScorer(20, epochs=20) + algo = ImplicitMFScorer(features=20, epochs=20) algo.train(ml_ds) assert algo.users_ is not None @@ -310,7 +312,7 @@ def test_als_train_large_noratings(ml_ds: Dataset): @wantjit def test_als_train_large_ratings(ml_ds): - algo = ImplicitMFScorer(20, epochs=20, use_ratings=True) + algo = ImplicitMFScorer(features=20, epochs=20, use_ratings=True) algo.train(ml_ds) assert algo.users_ is not None @@ -325,7 +327,7 @@ def test_als_train_large_ratings(ml_ds): @mark.eval def test_als_implicit_batch_accuracy(ml_100k): ds = from_interactions_df(ml_100k) - results = quick_measure_model(ImplicitMFScorer(25, epochs=20), ds) + results = quick_measure_model(ImplicitMFScorer(features=25, epochs=20), ds) ndcg = results.list_summary().loc["NDCG", "mean"] _log.info("nDCG for users is %.4f", ndcg) diff --git a/lenskit/tests/models/test_knn_item_item.py b/lenskit/tests/models/test_knn_item_item.py index c878b4a29..ebc2ec8ef 100644 --- a/lenskit/tests/models/test_knn_item_item.py +++ b/lenskit/tests/models/test_knn_item_item.py @@ -72,15 +72,15 @@ class TestItemKNN(BasicComponentTests, ScorerTests): def test_ii_config(): - model = ItemKNNScorer(30) - cfg = model.get_config() + model = ItemKNNScorer(k=30) + cfg = model.dump_config() print(cfg) assert cfg["feedback"] == "explicit" - assert cfg["nnbrs"] == 30 + assert cfg["k"] == 30 def test_ii_train(): - algo = ItemKNNScorer(30, save_nbrs=500) + algo = ItemKNNScorer(k=30, save_nbrs=500) algo.train(simple_ds) assert isinstance(algo.item_means_, np.ndarray) @@ -114,7 +114,7 @@ def test_ii_train(): def test_ii_train_unbounded(): - algo = ItemKNNScorer(30) + algo = ItemKNNScorer(k=30) algo.train(simple_ds) assert all(np.logical_not(np.isnan(algo.sim_matrix_.data))) @@ -141,7 +141,7 @@ def test_ii_train_unbounded(): def test_ii_simple_predict(): history = UserTrainingHistoryLookup() history.train(simple_ds) - algo = ItemKNNScorer(30, save_nbrs=500) + algo = ItemKNNScorer(k=30, save_nbrs=500) algo.train(simple_ds) q = history(3) @@ -156,7 +156,7 @@ def test_ii_simple_predict(): def test_ii_simple_implicit_predict(): history = UserTrainingHistoryLookup() history.train(simple_ds) - algo = ItemKNNScorer(30, feedback="implicit") + algo = ItemKNNScorer(k=30, feedback="implicit") algo.train(from_interactions_df(simple_ratings.loc[:, ["user", "item"]])) q = history(3) @@ -171,7 +171,7 @@ def test_ii_simple_implicit_predict(): def test_ii_simple_predict_unknown(): history = UserTrainingHistoryLookup() history.train(simple_ds) - algo = ItemKNNScorer(30, save_nbrs=500) + algo = ItemKNNScorer(k=30, save_nbrs=500) algo.train(simple_ds) q = history(3) @@ -187,7 +187,7 @@ def test_ii_simple_predict_unknown(): def test_ii_warns_center(): "Test that item-item warns if you center non-centerable data" data = simple_ratings.assign(rating=1) - algo = ItemKNNScorer(5) + algo = ItemKNNScorer(k=5) with pytest.warns(DataWarning): algo.train(from_interactions_df(data)) @@ -197,7 +197,7 @@ def test_ii_warns_center(): @inference_mode def test_ii_train_ml100k(tmp_path, ml_100k): "Test an unbounded model on ML-100K" - algo = ItemKNNScorer(30) + algo = ItemKNNScorer(k=30) _log.info("training model") algo.train(from_interactions_df(ml_100k)) @@ -239,11 +239,11 @@ def test_ii_large_models(rng, ml_ratings, ml_ds): "Several tests of large trained I-I models" _log.info("training limited model") MODEL_SIZE = 100 - algo_lim = ItemKNNScorer(30, save_nbrs=MODEL_SIZE) + algo_lim = ItemKNNScorer(k=30, save_nbrs=MODEL_SIZE) algo_lim.train(ml_ds) _log.info("training unbounded model") - algo_ub = ItemKNNScorer(30) + algo_ub = ItemKNNScorer(k=30) algo_ub.train(ml_ds) _log.info("testing models") @@ -363,7 +363,7 @@ def test_ii_implicit_large(rng, ml_ratings): NBRS = 5 NUSERS = 25 NRECS = 50 - algo = ItemKNNScorer(NBRS, feedback="implicit") + algo = ItemKNNScorer(k=NBRS, feedback="implicit") pipe = topn_pipeline(algo) pipe.train(from_interactions_df(ml_ratings[["user_id", "item_id"]], item_col="item_id")) @@ -400,7 +400,7 @@ def test_ii_implicit_large(rng, ml_ratings): @inference_mode def test_ii_save_load(tmp_path, ml_ratings, ml_subset): "Save and load a model" - original = ItemKNNScorer(30, save_nbrs=500) + original = ItemKNNScorer(k=30, save_nbrs=500) _log.info("building model") original.train(from_interactions_df(ml_subset, item_col="item_id")) @@ -439,7 +439,7 @@ def test_ii_save_load(tmp_path, ml_ratings, ml_subset): def test_ii_batch_accuracy(ml_100k): ds = from_interactions_df(ml_100k) - results = quick_measure_model(ItemKNNScorer(30), ds, predicts_ratings=True) + results = quick_measure_model(ItemKNNScorer(k=30), ds, predicts_ratings=True) metrics = results.list_metrics(fill_missing=False) summary = results.list_summary() @@ -457,7 +457,7 @@ def test_ii_batch_accuracy(ml_100k): def test_ii_known_preds(ml_ds): from lenskit import batch - iknn = ItemKNNScorer(20, min_sim=1.0e-6) + iknn = ItemKNNScorer(k=20, min_sim=1.0e-6) pipe = predict_pipeline(iknn, fallback=False) # noqa: F821 _log.info("training %s on ml data", iknn) pipe.train(ml_ds) diff --git a/lenskit/tests/models/test_knn_user_user.py b/lenskit/tests/models/test_knn_user_user.py index afcb76c49..1a2e9525e 100644 --- a/lenskit/tests/models/test_knn_user_user.py +++ b/lenskit/tests/models/test_knn_user_user.py @@ -36,7 +36,7 @@ class TestUserKNN(BasicComponentTests, ScorerTests): def test_uu_train(ml_ratings, ml_ds): - algo = UserKNNScorer(30) + algo = UserKNNScorer(k=30) algo.train(ml_ds) # we have data structures @@ -68,7 +68,7 @@ def test_uu_train(ml_ratings, ml_ds): def test_uu_predict_one(ml_ds): - algo = UserKNNScorer(30) + algo = UserKNNScorer(k=30) algo.train(ml_ds) preds = algo(query=4, items=ItemList([1016])) @@ -78,7 +78,7 @@ def test_uu_predict_one(ml_ds): def test_uu_predict_too_few(ml_ds): - algo = UserKNNScorer(30, min_nbrs=2) + algo = UserKNNScorer(k=30, min_nbrs=2) algo.train(ml_ds) preds = algo(query=4, items=ItemList([2091])) @@ -90,7 +90,7 @@ def test_uu_predict_too_few(ml_ds): def test_uu_predict_too_few_blended(ml_ds): - algo = UserKNNScorer(30, min_nbrs=2) + algo = UserKNNScorer(k=30, min_nbrs=2) algo.train(ml_ds) preds = algo(query=4, items=ItemList([1016, 2091])) @@ -102,7 +102,7 @@ def test_uu_predict_too_few_blended(ml_ds): def test_uu_predict_live_ratings(ml_ratings): - algo = UserKNNScorer(30, min_nbrs=2) + algo = UserKNNScorer(k=30, min_nbrs=2) no4 = ml_ratings[ml_ratings.user_id != 4] no4 = from_interactions_df(no4) algo.train(no4) @@ -122,7 +122,7 @@ def test_uu_predict_live_ratings(ml_ratings): def test_uu_save_load(tmp_path, ml_ratings, ml_ds): - orig = UserKNNScorer(30) + orig = UserKNNScorer(k=30) _log.info("training model") orig.train(ml_ds) @@ -167,7 +167,7 @@ def test_uu_save_load(tmp_path, ml_ratings, ml_ds): def test_uu_predict_unknown_empty(ml_ds): - algo = UserKNNScorer(30, min_nbrs=2) + algo = UserKNNScorer(k=30, min_nbrs=2) algo.train(ml_ds) preds = algo(query=-28018, items=ItemList([1016, 2091])) @@ -179,7 +179,7 @@ def test_uu_predict_unknown_empty(ml_ds): def test_uu_implicit(ml_ratings): "Train and use user-user on an implicit data set." - algo = UserKNNScorer(20, feedback="implicit") + algo = UserKNNScorer(k=20, feedback="implicit") data = ml_ratings.loc[:, ["user_id", "item_id"]] algo.train(from_interactions_df(data)) @@ -199,7 +199,7 @@ def test_uu_implicit(ml_ratings): @mark.slow def test_uu_save_load_implicit(tmp_path, ml_ratings): "Save and load user-user on an implicit data set." - orig = UserKNNScorer(20, feedback="implicit") + orig = UserKNNScorer(k=20, feedback="implicit") data = ml_ratings.loc[:, ["user_id", "item_id"]] orig.train(from_interactions_df(data)) @@ -216,7 +216,7 @@ def test_uu_save_load_implicit(tmp_path, ml_ratings): def test_uu_known_preds(ml_ds: Dataset): from lenskit import batch - uknn = UserKNNScorer(30, min_sim=1.0e-6) + uknn = UserKNNScorer(k=30, min_sim=1.0e-6) pipe = predict_pipeline(uknn, fallback=False) _log.info("training %s on ml data", uknn) pipe.train(ml_ds) @@ -262,7 +262,7 @@ def __batch_eval(job): @mark.eval def test_uu_batch_accuracy(ml_100k: pd.DataFrame): ds = from_interactions_df(ml_100k) - results = quick_measure_model(UserKNNScorer(30), ds, predicts_ratings=True) + results = quick_measure_model(UserKNNScorer(k=30), ds, predicts_ratings=True) summary = results.list_summary() @@ -274,7 +274,9 @@ def test_uu_batch_accuracy(ml_100k: pd.DataFrame): @mark.eval def test_uu_implicit_batch_accuracy(ml_100k: pd.DataFrame): ds = from_interactions_df(ml_100k) - results = quick_measure_model(UserKNNScorer(30, feedback="implicit"), ds, predicts_ratings=True) + results = quick_measure_model( + UserKNNScorer(k=30, feedback="implicit"), ds, predicts_ratings=True + ) summary = results.list_summary() diff --git a/lenskit/tests/pipeline/test_component_config.py b/lenskit/tests/pipeline/test_component_config.py index 3f0f919c6..f6cdc6537 100644 --- a/lenskit/tests/pipeline/test_component_config.py +++ b/lenskit/tests/pipeline/test_component_config.py @@ -4,35 +4,89 @@ # Licensed under the MIT license, see LICENSE.md for details. # SPDX-License-Identifier: MIT +from __future__ import annotations + import json +from dataclasses import dataclass + +from pydantic import BaseModel +from pydantic.dataclasses import dataclass as pydantic_dataclass + +from pytest import mark from lenskit.pipeline import Pipeline from lenskit.pipeline.components import Component -class Prefixer(Component): - prefix: str +@dataclass +class PrefixConfigDC: + prefix: str = "UNDEFINED" + + +class PrefixConfigM(BaseModel): + prefix: str = "UNDEFINED" + + +@pydantic_dataclass +class PrefixConfigPYDC: + prefix: str = "UNDEFINED" + + +class PrefixerDC(Component): + config: PrefixConfigDC + + def __call__(self, msg: str) -> str: + return self.config.prefix + msg + + +class PrefixerM(Component): + config: PrefixConfigM + + def __call__(self, msg: str) -> str: + return self.config.prefix + msg + + +# make sure it works with sub-sub-classes +class PrefixerM2(PrefixerM): + config: PrefixConfigM + - def __init__(self, prefix: str = "hello"): - self.prefix = prefix +class PrefixerPYDC(Component): + config: PrefixConfigPYDC def __call__(self, msg: str) -> str: - return self.prefix + msg + return self.config.prefix + msg + + +@mark.parametrize("prefixer", [PrefixerDC, PrefixerM, PrefixerPYDC, PrefixerM2]) +def test_config_setup(prefixer: type[Component]): + ccls = prefixer._config_class() # type: ignore + assert ccls is not None + comp = prefixer() + assert isinstance(comp.config, ccls) -def test_auto_config_roundtrip(): - comp = Prefixer("FOOBIE BLETCH") - cfg = comp.get_config() - assert "prefix" in cfg +@mark.parametrize("prefixer", [PrefixerDC, PrefixerM, PrefixerPYDC]) +def test_auto_config_roundtrip(prefixer: type[Component]): + comp = prefixer(prefix="FOOBIE BLETCH") - c2 = Prefixer.from_config(cfg) + cfg = comp.config + cfg_data = comp.dump_config() + assert "prefix" in cfg_data + + c2 = prefixer(cfg) assert c2 is not comp - assert c2.prefix == comp.prefix + assert c2.config.prefix == comp.config.prefix + + c3 = prefixer(prefixer.validate_config(cfg_data)) + assert c3 is not comp + assert c3.config.prefix == comp.config.prefix -def test_pipeline_config(): - comp = Prefixer("scroll named ") +@mark.parametrize("prefixer", [PrefixerDC, PrefixerM, PrefixerPYDC]) +def test_pipeline_config(prefixer: type[Component]): + comp = prefixer(prefix="scroll named ") pipe = Pipeline() msg = pipe.create_input("msg", str) @@ -45,3 +99,21 @@ def test_pipeline_config(): assert "prefix" in config assert config["prefix"]["prefix"] == "scroll named " + + +@mark.parametrize("prefixer", [PrefixerDC, PrefixerM, PrefixerPYDC]) +def test_pipeline_config_roundtrip(prefixer: type[Component]): + comp = prefixer(prefix="scroll named ") + + pipe = Pipeline() + msg = pipe.create_input("msg", str) + pipe.add_component("prefix", comp, msg=msg) + + assert pipe.run(msg="FOOBIE BLETCH") == "scroll named FOOBIE BLETCH" + + config = pipe.get_config() + print(config.model_dump_json(indent=2)) + + p2 = Pipeline.from_config(config) + assert p2.node("prefix", missing="none") is not None + assert p2.run(msg="READ ME") == "scroll named READ ME" diff --git a/lenskit/tests/pipeline/test_pipeline_clone.py b/lenskit/tests/pipeline/test_pipeline_clone.py index 8792f8629..6bd67e918 100644 --- a/lenskit/tests/pipeline/test_pipeline_clone.py +++ b/lenskit/tests/pipeline/test_pipeline_clone.py @@ -5,20 +5,23 @@ # SPDX-License-Identifier: MIT import json +from dataclasses import dataclass from lenskit.pipeline import Pipeline from lenskit.pipeline.components import Component from lenskit.pipeline.nodes import ComponentNode -class Prefixer(Component): +@dataclass +class PrefixConfig: prefix: str - def __init__(self, prefix: str = "hello"): - self.prefix = prefix + +class Prefixer(Component): + config: PrefixConfig def __call__(self, msg: str) -> str: - return self.prefix + msg + return self.config.prefix + msg class Question: @@ -33,7 +36,7 @@ def exclaim(msg: str) -> str: def test_pipeline_clone(): - comp = Prefixer("scroll named ") + comp = Prefixer(PrefixConfig("scroll named ")) pipe = Pipeline() msg = pipe.create_input("msg", str) @@ -46,13 +49,13 @@ def test_pipeline_clone(): assert isinstance(n2, ComponentNode) assert isinstance(n2.component, Prefixer) assert n2.component is not comp - assert n2.component.prefix == comp.prefix + assert n2.component.config.prefix == comp.config.prefix assert p2.run(msg="HACKEM MUCHE") == "scroll named HACKEM MUCHE" def test_pipeline_clone_with_function(): - comp = Prefixer("scroll named ") + comp = Prefixer(prefix="scroll named ") pipe = Pipeline() msg = pipe.create_input("msg", str) @@ -67,7 +70,7 @@ def test_pipeline_clone_with_function(): def test_pipeline_clone_with_nonconfig_class(): - comp = Prefixer("scroll named ") + comp = Prefixer(prefix="scroll named ") pipe = Pipeline() msg = pipe.create_input("msg", str) diff --git a/lenskit/tests/pipeline/test_save_load.py b/lenskit/tests/pipeline/test_save_load.py index a1ea6208e..39ed12a94 100644 --- a/lenskit/tests/pipeline/test_save_load.py +++ b/lenskit/tests/pipeline/test_save_load.py @@ -6,6 +6,7 @@ import logging import re +from dataclasses import dataclass from types import NoneType import numpy as np @@ -22,14 +23,16 @@ # region Test Components -class Prefixer(Component): +@dataclass +class PrefixConfig: prefix: str - def __init__(self, prefix: str = "hello"): - self.prefix = prefix + +class Prefixer(Component): + config: PrefixConfig def __call__(self, msg: str) -> str: - return self.prefix + msg + return self.config.prefix + msg def negative(x: int) -> int: @@ -137,7 +140,7 @@ def test_configurable_component(): pipe = Pipeline() msg = pipe.create_input("msg", str) - pfx = Prefixer("scroll named ") + pfx = Prefixer(prefix="scroll named ") pipe.add_component("prefix", pfx, msg=msg) cfg = pipe.get_config() @@ -218,7 +221,7 @@ def test_hash_validate(): pipe = Pipeline() msg = pipe.create_input("msg", str) - pfx = Prefixer("scroll named ") + pfx = Prefixer(prefix="scroll named ") pipe.add_component("prefix", pfx, msg=msg) cfg = pipe.get_config() diff --git a/lenskit/tests/utils/test_random.py b/lenskit/tests/utils/test_random.py index e1002bfe3..0873b0320 100644 --- a/lenskit/tests/utils/test_random.py +++ b/lenskit/tests/utils/test_random.py @@ -4,7 +4,7 @@ from hypothesis import strategies as st from lenskit.data.query import RecQuery -from lenskit.util.random import derivable_rng, make_seed +from lenskit.random import derivable_rng, make_seed @given( diff --git a/lenskit/tests/utils/test_task_logging.py b/lenskit/tests/utils/test_task_logging.py index 85334330f..52068fe0a 100644 --- a/lenskit/tests/utils/test_task_logging.py +++ b/lenskit/tests/utils/test_task_logging.py @@ -6,7 +6,7 @@ def test_train_task(ml_ds: Dataset): - info = BiasedMFScorer(50, epochs=5) + info = BiasedMFScorer(features=50, epochs=5) pipe = topn_pipeline(info) with Task("train ImplicitMF", reset_hwm=True) as task: diff --git a/pixi.lock b/pixi.lock index 6213d1dae..a6e90e770 100644 --- a/pixi.lock +++ b/pixi.lock @@ -4224,6 +4224,8 @@ environments: linux-64: - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/accessible-pygments-0.0.5-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/alabaster-1.0.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.13-hb9d3cd8_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/annotated-types-0.7.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/anyio-4.7.0-pyhd8ed1ab_0.conda @@ -4279,6 +4281,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/debugpy-1.8.11-py311hfdbb021_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/decorator-5.1.1-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/defusedxml-0.7.1-pyhd8ed1ab_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/docutils-0.21.2-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/double-conversion-3.3.0-h59595ed_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/entrypoints-0.4-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.2-pyhd8ed1ab_1.conda @@ -4302,6 +4305,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/gmp-6.3.0-hac33072_2.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/gmpy2-2.1.5-py311h0f6cedb_3.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h59595ed_1003.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/greenlet-3.1.1-py311hfdbb021_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/h11-0.14.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/h2-4.1.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-9.0.0-hda332d3_1.conda @@ -4313,6 +4317,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/hypothesis-6.122.3-pyha770c72_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/icu-75.1-he02047a_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/idna-3.10-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/imagesize-1.4.1-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/implicit-0.7.2-py311h2c6edaf_5.conda - conda: https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-8.5.0-pyha770c72_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.4.5-pyhd8ed1ab_1.conda @@ -4328,6 +4333,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/jsonschema-4.23.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jsonschema-specifications-2024.10.1-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jsonschema-with-format-nongpl-4.23.0-hd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter-cache-1.0.1-pyhff2d567_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter-lsp-2.2.5-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_client-8.6.3-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_core-5.7.2-pyh31011fe_1.conda @@ -4338,10 +4344,12 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_pygments-0.3.0-pyhd8ed1ab_2.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_server-2.27.3-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_widgets-3.0.13-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/just-1.38.0-h8fae777_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/kernel-headers_linux-64-3.10.0-he073ed8_18.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.7-py311hd18a35c_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.3-h659f571_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/latexcodec-2.0.1-pyh9f0ad1d_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.16-hb7c19ff_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.43-h712a8e2_2.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2 @@ -4423,6 +4431,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.9.4-py311h38be061_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.9.4-py311h2b939e6_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/matplotlib-inline-0.1.7-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mdit-py-plugins-0.4.2-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/mdurl-0.1.2-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/mistune-3.0.2-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/mkl-2024.2.2-ha957f24_16.conda @@ -4433,6 +4442,8 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/mysql-common-9.0.1-h266115a_3.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-9.0.1-he0572af_3.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/myst-nb-1.1.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/myst-parser-4.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/nbclient-0.10.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/nbconvert-core-7.16.4-pyhff2d567_2.conda - conda: https://conda.anaconda.org/conda-forge/noarch/nbformat-5.10.4-pyhd8ed1ab_1.conda @@ -4471,9 +4482,12 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/py-cpuinfo-9.0.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/pyarrow-18.1.0-py311h38be061_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/pyarrow-core-18.1.0-py311h4854187_0_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pybtex-0.24.0-pyhd8ed1ab_3.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/pybtex-docutils-1.0.3-py311h38be061_2.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyh29332c3_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pydantic-2.10.3-pyh3cfb1c2_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/pydantic-core-2.27.1-py311h9e33e62_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pydata-sphinx-theme-0.16.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.18.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.2.0-pyhd8ed1ab_2.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pyprojroot-0.3.0-pyhd8ed1ab_0.conda @@ -4513,13 +4527,27 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/sleef-3.7-h1b44611_2.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/snappy-1.2.1-h8bd8927_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sniffio-1.3.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/snowballstemmer-2.2.0-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/sortedcontainers-2.4.0-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/soupsieve-2.5-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/spdlog-1.11.0-h9b3ece8_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-8.1.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-togglebutton-0.3.2-pyhd8ed1ab_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-2.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-bibtex-2.6.3-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-devhelp-2.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-htmlhelp-2.1.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-mermaid-1.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-2.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.10-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxext-opengraph-0.9.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/sqlalchemy-2.0.36-py311h9ecbd09_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/stack_data-0.6.3-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/structlog-24.4.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sympy-1.13.3-pyh2585a3b_104.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sysroot_linux-64-2.17-h4a8ded7_18.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tabulate-0.9.0-pyhd8ed1ab_2.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/tbb-2021.13.0-hceb3a55_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/terminado-0.18.1-pyh0d859eb_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.5.0-pyhc1e730c_0.conda @@ -4577,6 +4605,8 @@ environments: - pypi: lenskit-hpf - pypi: lenskit-implicit osx-arm64: + - conda: https://conda.anaconda.org/conda-forge/noarch/accessible-pygments-0.0.5-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/alabaster-1.0.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/annotated-types-0.7.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/anyio-4.7.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/appnope-0.1.4-pyhd8ed1ab_1.conda @@ -4628,6 +4658,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/debugpy-1.8.11-py311h155a34a_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/decorator-5.1.1-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/defusedxml-0.7.1-pyhd8ed1ab_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/docutils-0.21.2-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/entrypoints-0.4-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.2-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/executing-2.1.0-pyhd8ed1ab_1.conda @@ -4640,6 +4671,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/glog-0.7.1-heb240a5_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/gmp-6.3.0-h7bae524_2.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/gmpy2-2.1.5-py311hb5d9ff4_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/greenlet-3.1.1-py311h3f08180_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/h11-0.14.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/h2-4.1.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/hpack-4.0.0-pyhd8ed1ab_1.conda @@ -4650,6 +4682,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/hypothesis-6.122.3-pyha770c72_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/icu-75.1-hfee45f7_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/idna-3.10-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/imagesize-1.4.1-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/implicit-0.7.2-py311h3e667b1_5.conda - conda: https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-8.5.0-pyha770c72_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.4.5-pyhd8ed1ab_1.conda @@ -4665,6 +4698,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/jsonschema-4.23.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jsonschema-specifications-2024.10.1-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jsonschema-with-format-nongpl-4.23.0-hd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter-cache-1.0.1-pyhff2d567_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter-lsp-2.2.5-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_client-8.6.3-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_core-5.7.2-pyh31011fe_1.conda @@ -4675,8 +4709,10 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_pygments-0.3.0-pyhd8ed1ab_2.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_server-2.27.3-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_widgets-3.0.13-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/just-1.38.0-h0716509_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/kiwisolver-1.4.7-py311h2c37856_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/krb5-1.21.3-h237132a_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/latexcodec-2.0.1-pyh9f0ad1d_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/lcms2-2.16-ha0e7c42_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/lerc-4.0.0-h9a09cb3_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libabseil-20240722.0-cxx17_hf9b8971_1.conda @@ -4734,6 +4770,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/matplotlib-3.9.4-py311ha1ab1f8_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/matplotlib-base-3.9.4-py311h031da69_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/matplotlib-inline-0.1.7-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mdit-py-plugins-0.4.2-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/mdurl-0.1.2-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/mistune-3.0.2-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/more-itertools-10.5.0-pyhd8ed1ab_1.conda @@ -4741,6 +4778,8 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/mpfr-4.2.1-hb693164_3.conda - conda: https://conda.anaconda.org/conda-forge/noarch/mpmath-1.3.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/myst-nb-1.1.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/myst-parser-4.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/nbclient-0.10.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/nbconvert-core-7.16.4-pyhff2d567_2.conda - conda: https://conda.anaconda.org/conda-forge/noarch/nbformat-5.10.4-pyhd8ed1ab_1.conda @@ -4777,9 +4816,12 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/py-cpuinfo-9.0.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pyarrow-18.1.0-py311ha1ab1f8_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pyarrow-core-18.1.0-py311he04fa90_0_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pybtex-0.24.0-pyhd8ed1ab_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pybtex-docutils-1.0.3-py311h267d04e_2.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyh29332c3_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pydantic-2.10.3-pyh3cfb1c2_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pydantic-core-2.27.1-py311h3ff9189_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pydata-sphinx-theme-0.16.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.18.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pyobjc-core-10.3.2-py311hab620ed_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pyobjc-framework-cocoa-10.3.2-py311hab620ed_0.conda @@ -4818,11 +4860,25 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/sleef-3.7-h8391f65_2.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/snappy-1.2.1-h98b9ce2_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sniffio-1.3.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/snowballstemmer-2.2.0-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/sortedcontainers-2.4.0-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/soupsieve-2.5-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-8.1.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-togglebutton-0.3.2-pyhd8ed1ab_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-2.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-bibtex-2.6.3-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-devhelp-2.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-htmlhelp-2.1.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-mermaid-1.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-2.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.10-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxext-opengraph-0.9.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/sqlalchemy-2.0.36-py311hae2e1ce_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/stack_data-0.6.3-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/structlog-24.4.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sympy-1.13.3-pyh2585a3b_104.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tabulate-0.9.0-pyhd8ed1ab_2.conda - conda: https://conda.anaconda.org/conda-forge/noarch/terminado-0.18.1-pyh31c8845_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.5.0-pyhc1e730c_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/tinycss2-1.4.0-pyhd8ed1ab_0.conda @@ -9988,6 +10044,7 @@ packages: sha256: fe51de6107f9edc7aa4f786a70f4a883943bc9d39b3bb7307c04c41410990726 md5: d7c89558ba9fa0495403155b64376d81 license: None + purls: [] size: 2562 timestamp: 1578324546067 - conda: https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2 @@ -11188,6 +11245,7 @@ packages: - libgcc-ng >=12 license: bzip2-1.0.6 license_family: BSD + purls: [] size: 252783 timestamp: 1720974456583 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/bzip2-1.0.8-h99b78c6_7.conda @@ -11197,6 +11255,7 @@ packages: - __osx >=11.0 license: bzip2-1.0.6 license_family: BSD + purls: [] size: 122909 timestamp: 1720974522888 - conda: https://conda.anaconda.org/conda-forge/win-64/bzip2-1.0.8-h2466b09_7.conda @@ -11247,12 +11306,14 @@ packages: sha256: afee721baa6d988e27fef1832f68d6f32ac8cc99cdf6015732224c2841a09cea md5: c27d1c142233b5bc9ca570c6e2e0c244 license: ISC + purls: [] size: 159003 timestamp: 1725018903918 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/ca-certificates-2024.8.30-hf0a4a13_0.conda sha256: 2db1733f4b644575dbbdd7994a8f338e6ef937f5ebdb74acd557e9dda0211709 md5: 40dec13fd8348dbe303e57be74bd3d35 license: ISC + purls: [] size: 158482 timestamp: 1725019034582 - conda: https://conda.anaconda.org/conda-forge/win-64/ca-certificates-2024.8.30-h56e8100_0.conda @@ -13277,6 +13338,8 @@ packages: - python >=3.9 license: BSD-3-Clause license_family: BSD + purls: + - pkg:pypi/jinja2?source=hash-mapping size: 110963 timestamp: 1733217424408 - conda: https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.2-pyhd8ed1ab_1.conda @@ -13879,6 +13942,7 @@ packages: - binutils_impl_linux-64 2.43 license: GPL-3.0-only license_family: GPL + purls: [] size: 669211 timestamp: 1729655358674 - pypi: lenskit @@ -13907,6 +13971,32 @@ packages: - pytest>=8.2,<9 ; extra == 'test' requires_python: '>=3.11' editable: true +- pypi: lenskit + name: lenskit + version: 2025.0.0a4.dev21+g67a0be45.d20250108 + sha256: e41346462653e72649ef39d552f9ec310b673988064a8fa5fdeb294dfc44a605 + requires_dist: + - more-itertools>=9.0 + - numpy>=1.25 + - pandas~=2.0 + - pyarrow>=15 + - pydantic~=2.7 + - pyzmq>=24 + - rich~=13.5 + - scipy>=1.11 + - structlog>=23.2 + - threadpoolctl>=3.0 + - torch~=2.1 + - scikit-learn>=1.1 ; extra == 'sklearn' + - hypothesis>=6.16 ; extra == 'test' + - pyprojroot==0.3.* ; extra == 'test' + - pytest-benchmark==4.* ; extra == 'test' + - pytest-cov>=2.12 ; extra == 'test' + - pytest-doctestplus>=1.2.1,<2 ; extra == 'test' + - pytest-repeat>=0.9 ; extra == 'test' + - pytest>=8.2,<9 ; extra == 'test' + requires_python: '>=3.11' + editable: true - pypi: lenskit-funksvd name: lenskit-funksvd version: 2025.0.0a1.dev63+g2a2ba8ae.d20241213 @@ -14756,6 +14846,7 @@ packages: - expat 2.6.4.* license: MIT license_family: MIT + purls: [] size: 73304 timestamp: 1730967041968 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libexpat-2.6.4-h286801f_0.conda @@ -14767,6 +14858,7 @@ packages: - expat 2.6.4.* license: MIT license_family: MIT + purls: [] size: 64693 timestamp: 1730967175868 - conda: https://conda.anaconda.org/conda-forge/win-64/libexpat-2.6.4-he0c23c2_0.conda @@ -14789,6 +14881,7 @@ packages: - libgcc-ng >=9.4.0 license: MIT license_family: MIT + purls: [] size: 58292 timestamp: 1636488182923 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libffi-3.4.2-h3422bc3_5.tar.bz2 @@ -14796,6 +14889,7 @@ packages: md5: 086914b672be056eb70fd4285b6783b6 license: MIT license_family: MIT + purls: [] size: 39020 timestamp: 1636488587153 - conda: https://conda.anaconda.org/conda-forge/win-64/libffi-3.4.2-h8ffe710_5.tar.bz2 @@ -14819,6 +14913,7 @@ packages: - libgcc-ng ==14.2.0=*_1 license: GPL-3.0-only WITH GCC-exception-3.1 license_family: GPL + purls: [] size: 848745 timestamp: 1729027721139 - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-14.2.0-h69a702a_1.conda @@ -14828,6 +14923,7 @@ packages: - libgcc 14.2.0 h77fa898_1 license: GPL-3.0-only WITH GCC-exception-3.1 license_family: GPL + purls: [] size: 54142 timestamp: 1729027726517 - conda: https://conda.anaconda.org/conda-forge/linux-64/libgfortran-14.2.0-h69a702a_1.conda @@ -15312,6 +15408,7 @@ packages: - __glibc >=2.17,<3.0.a0 - libgcc >=13 license: 0BSD + purls: [] size: 111132 timestamp: 1733407410083 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/liblzma-5.6.3-h39f12f2_1.conda @@ -15320,6 +15417,7 @@ packages: depends: - __osx >=11.0 license: 0BSD + purls: [] size: 99129 timestamp: 1733407496073 - conda: https://conda.anaconda.org/conda-forge/win-64/liblzma-5.6.3-h2466b09_1.conda @@ -15403,6 +15501,7 @@ packages: - libgcc-ng >=12 license: LGPL-2.1-only license_family: GPL + purls: [] size: 33408 timestamp: 1697359010159 - conda: https://conda.anaconda.org/conda-forge/linux-64/libntlm-1.4-h7f98852_1002.tar.bz2 @@ -15686,6 +15785,7 @@ packages: - libgcc >=13 - libzlib >=1.3.1,<2.0a0 license: Unlicense + purls: [] size: 873551 timestamp: 1733761824646 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libsqlite-3.47.2-h3f77e49_0.conda @@ -15695,6 +15795,7 @@ packages: - __osx >=11.0 - libzlib >=1.3.1,<2.0a0 license: Unlicense + purls: [] size: 850553 timestamp: 1733762057506 - conda: https://conda.anaconda.org/conda-forge/win-64/libsqlite-3.47.2-h67fdade_0.conda @@ -16007,6 +16108,7 @@ packages: - libgcc-ng >=12 license: BSD-3-Clause license_family: BSD + purls: [] size: 33601 timestamp: 1680112270483 - conda: https://conda.anaconda.org/conda-forge/linux-64/libuv-1.49.2-hb9d3cd8_0.conda @@ -16125,6 +16227,7 @@ packages: depends: - libgcc-ng >=12 license: LGPL-2.1-or-later + purls: [] size: 100393 timestamp: 1702724383534 - conda: https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.7.0-h2c5496b_1.conda @@ -16235,6 +16338,7 @@ packages: - zlib 1.3.1 *_2 license: Zlib license_family: Other + purls: [] size: 60963 timestamp: 1727963148474 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libzlib-1.3.1-h8359307_2.conda @@ -16246,6 +16350,7 @@ packages: - zlib 1.3.1 *_2 license: Zlib license_family: Other + purls: [] size: 46438 timestamp: 1727963202283 - conda: https://conda.anaconda.org/conda-forge/win-64/libzlib-1.3.1-h2466b09_2.conda @@ -17186,6 +17291,7 @@ packages: - __glibc >=2.17,<3.0.a0 - libgcc-ng >=12 license: X11 AND BSD-3-Clause + purls: [] size: 889086 timestamp: 1724658547447 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/ncurses-6.5-h7bae524_1.conda @@ -17194,6 +17300,7 @@ packages: depends: - __osx >=11.0 license: X11 AND BSD-3-Clause + purls: [] size: 802321 timestamp: 1724658775723 - conda: https://conda.anaconda.org/conda-forge/noarch/nest-asyncio-1.6.0-pyhd8ed1ab_1.conda @@ -17772,6 +17879,7 @@ packages: - libgcc >=13 license: Apache-2.0 license_family: Apache + purls: [] size: 2947466 timestamp: 1731377666602 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/openssl-3.4.0-h39f12f2_0.conda @@ -17782,6 +17890,7 @@ packages: - ca-certificates license: Apache-2.0 license_family: Apache + purls: [] size: 2935176 timestamp: 1731377561525 - conda: https://conda.anaconda.org/conda-forge/win-64/openssl-3.4.0-h2466b09_0.conda @@ -19133,6 +19242,8 @@ packages: - python >=3.9 license: BSD-2-Clause license_family: BSD + purls: + - pkg:pypi/pygments?source=hash-mapping size: 876700 timestamp: 1733221731178 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pyobjc-core-10.3.2-py311hab620ed_0.conda @@ -20623,6 +20734,7 @@ packages: - ncurses >=6.3,<7.0a0 license: GPL-3.0-only license_family: GPL + purls: [] size: 281456 timestamp: 1679532220005 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/readline-8.2-h92ec313_1.conda @@ -20632,6 +20744,7 @@ packages: - ncurses >=6.3,<7.0a0 license: GPL-3.0-only license_family: GPL + purls: [] size: 250351 timestamp: 1679532511311 - conda: https://conda.anaconda.org/conda-forge/noarch/referencing-0.35.1-pyhd8ed1ab_1.conda @@ -21942,6 +22055,7 @@ packages: - libzlib >=1.2.13,<2.0.0a0 license: TCL license_family: BSD + purls: [] size: 3318875 timestamp: 1699202167581 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/tk-8.6.13-h5083fa2_1.conda @@ -21951,6 +22065,7 @@ packages: - libzlib >=1.2.13,<2.0.0a0 license: TCL license_family: BSD + purls: [] size: 3145523 timestamp: 1699202432999 - conda: https://conda.anaconda.org/conda-forge/win-64/tk-8.6.13-h5226925_1.conda @@ -22172,6 +22287,7 @@ packages: sha256: 4fde5c3008bf5d2db82f2b50204464314cc3c91c1d953652f7bd01d9e52aefdf md5: 8ac3367aafb1cc0a068483c580af8015 license: LicenseRef-Public-Domain + purls: [] size: 122354 timestamp: 1728047496079 - conda: https://conda.anaconda.org/conda-forge/win-64/ucrt-10.0.22621.0-h57928b3_1.conda diff --git a/pixi.toml b/pixi.toml index 48a6f7810..9fda08da1 100644 --- a/pixi.toml +++ b/pixi.toml @@ -228,6 +228,7 @@ test-py312-full = { features = [ ], solve-group = "py312-nix" } test-examples = { features = [ "demo", + "doc", "test", "funksvd", "hpf", diff --git a/workflows/test/test-examples.ts b/workflows/test/test-examples.ts index 20d88c48e..1c83130e3 100644 --- a/workflows/test/test-examples.ts +++ b/workflows/test/test-examples.ts @@ -24,9 +24,15 @@ export function exampleTestJob(): WorkflowJob { ...condaSetup(options), ...mlDataSteps(["ml-100k", "ml-1m", "ml-10m", "ml-20m"]), { - "name": "📕 Validate documentation examples", + "name": "📕 Validate code examples", "run": script( - `pytest ${cov} --nbval-lax --doctest-glob='*.rst' --ignore='docs/_ext' --log-file test-docs.log docs */lenskit`, + `sphinx-build -b doctest docs build/doc`, + ), + }, + { + "name": "📕 Validate example notebooks", + "run": script( + `pytest ${cov} --nbval-lax --log-file test-notebooks.log docs`, ), }, ...coverageSteps(options),