Merge pull request #24 from tohtsky/develop

tohtsky · web-flow · commit f054c327b819 · 2021-01-09T15:15:48.000+09:00
MSVC wheel (#23)
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
@@ -3,6 +3,7 @@ on:
   push:
     branches:
       - main
+      - develop
   release:
     types:
       - created
@@ -28,7 +29,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-20.04, macOS-10.15]
+        os: [ubuntu-20.04, macOS-10.15, windows-2019]
 
     steps:
       - uses: actions/checkout@v2
@@ -43,7 +44,7 @@ jobs:
         run: python -m cibuildwheel --output-dir wheelhouse
         env:
           CIBW_BUILD: "cp36-* cp37-* cp38-* cp39-*"
-          CIBW_ENVIRONMENT: "CFLAGS='-march=core-avx-i' CXXFLAGS='-march=core-avx-i'"
+          CIBW_ENVIRONMENT: "CFLAGS='-march=core-avx-i' CXXFLAGS='-march=core-avx-i' CL='/arch:AVX'"
           CIBW_TEST_COMMAND: pytest {project}/tests
           CIBW_TEST_REQUIRES: pytest
 
diff --git a/Readme.md b/Readme.md
@@ -23,13 +23,13 @@ However, I decided to implement my own one to
 
 # Installation & Optional Dependencies
 
-There are binaries for Linux & MacOS with python>=3.6. You can install them via
+There are binaries for Linux, MacOS, and Windows with python>=3.6. You can install them via
 
 ```sh
 pip install irspack
 ```
 
-The binary has been compiled to use AVX instruction. If you want to use AVX2/AVX512 or your environment does not support AVX, install it from source like
+The binaries have been compiled to use AVX instruction. If you want to use AVX2/AVX512 or your environment does not support AVX, install it from source like
 
 ```sh
 CFLAGS="-march=native" pip install git+https://github.com/tohtsky/irspack.git
diff --git a/cpp_source/als/wrapper.cpp b/cpp_source/als/wrapper.cpp
@@ -6,6 +6,7 @@
 #include <pybind11/eigen.h>
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
+#include <sstream>
 #include <stdexcept>
 #include <vector>
 
@@ -14,8 +15,12 @@ using namespace ials11;
 using std::vector;
 
 PYBIND11_MODULE(_ials, m) {
-  py::print("info: irspack's _ials core built to use\n\t",
-            Eigen::SimdInstructionSetsInUse());
+  std::stringstream doc_stream;
+  doc_stream << "irspack's core module for \"IALSRecommender\"." << std::endl
+             << "Built to use" << std::endl
+             << "\t" << Eigen::SimdInstructionSetsInUse();
+
+  m.doc() = doc_stream.str();
   py::class_<IALSLearningConfig>(m, "IALSLearningConfig")
       .def(py::init<size_t, Real, Real, Real, int, size_t, bool, size_t>())
       .def(py::pickle(
diff --git a/cpp_source/evaluator.cpp b/cpp_source/evaluator.cpp
@@ -22,6 +22,9 @@ namespace irspack {
 
 using CountVector = Eigen::Matrix<std::int64_t, Eigen::Dynamic, 1>;
 struct Metrics {
+  // This isn't necessary, but MSVC complains Metric is not default
+  // constructible.
+  inline Metrics() : Metrics(0) {}
   inline Metrics(size_t n_item) : n_item(n_item), item_cnt(n_item) {
     item_cnt.array() = 0;
   }
diff --git a/tests/dataset/test_ml100k.py b/tests/dataset/test_ml100k.py
diff --git a/tests/dataset/test_ml1m.py b/tests/dataset/test_ml1m.py
@@ -1,55 +1,161 @@
-from tempfile import NamedTemporaryFile
+import os
+import sys
 from zipfile import ZipFile
 
 import numpy as np
+import pytest
 
-from irspack.dataset.movielens import MovieLens1MDataManager
+from irspack.dataset.movielens import (
+    MovieLens1MDataManager,
+    MovieLens20MDataManager,
+    MovieLens100KDataManager,
+)
+
+ZIPFILE_NAME = os.path.join(os.path.expanduser("~"), "ml.test.zip")
+
+
+def test_ml100k() -> None:
+    if sys.platform == "win32":
+        pytest.skip("Skip on Windows.")
+    GENRES = ["fantasy", "action", "thriller"]
+    try:
+        with ZipFile(ZIPFILE_NAME, "w") as zf:
+            with zf.open("ml-100k/u.data", "w") as ofs:
+                ofs.write("1\t2\t5\t0\n1\t3\t5\t86400".encode())
+
+            with zf.open("ml-100k/u.genre", "w") as ofs:
+                genre_string = ""
+                for i, genre in enumerate(GENRES):
+                    genre_string += f"{genre}|{i}\n"
+                ofs.write(genre_string.encode())
+
+            with zf.open("ml-100k/u.item", "w") as ofs:
+                # movieId = 1 has action tag,
+                # movieId = 2 has fantasy & thriller tags
+                ofs.write(
+                    """1|A fantastic movie|2020-01-01|2021-01-01|http://example.com|0|1|0
+    2|Pandemic|2020-01-01|2021-01-01|http://example.com|1|0|1""".encode(
+                        "latin-1"
+                    )
+                )
+            with zf.open("ml-100k/u.user", "w") as ofs:
+                ofs.write(
+                    """1|32|M|0|1690074
+    2|4|F|1|1760013
+    """.encode()
+                )
+
+        loader = MovieLens100KDataManager(ZIPFILE_NAME)
+        df = loader.read_interaction()
+        movie_info, genres = loader.read_item_info()
+        user_info = loader.read_user_info()
+        assert df.shape == (2, 4)
+        np.testing.assert_array_equal(df["userId"].values, [1, 1])
+        np.testing.assert_array_equal(df["movieId"].values, [2, 3])
+        np.testing.assert_array_equal(df["rating"].values, [5, 5])
+        np.testing.assert_array_equal(
+            df["timestamp"].values,
+            np.asarray(
+                [
+                    "1970-01-01",
+                    "1970-01-02",
+                ],
+                dtype="datetime64[ns]",
+            ),
+        )
+        np.testing.assert_array_equal(movie_info.index.values, [1, 2])
+        np.testing.assert_array_equal(
+            movie_info.release_date,
+            np.asarray(["2020-01-01", "2020-01-01"], dtype="datetime64[ns]"),
+        )
+        assert set(genres[genres.movieId == 1].genre) == set(["action"])
+        assert set(genres[genres.movieId == 2].genre) == set(["fantasy", "thriller"])
+
+        np.testing.assert_array_equal(user_info.index.values, [1, 2])
+        np.testing.assert_array_equal(user_info.gender, ["M", "F"])
+    finally:
+        os.remove(ZIPFILE_NAME)
 
 
 def test_ml1m() -> None:
-    fp = NamedTemporaryFile("wb")
-    fp.name
-    with ZipFile(fp.name, "w") as zf:
-        with zf.open("ml-1m/ratings.dat", "w") as ofs:
-            ofs.write(
-                """1::2::5::0
-1::3::5::86400
-""".encode()
-            )
-        with zf.open("ml-1m/movies.dat", "w") as ofs:
-            ofs.write(
-                """1::A fantastic movie (2020)::fantasy|thriller
-1917::Vinni-Pukh(1969)::children
-""".encode(
-                    "latin-1"
+    if sys.platform == "win32":
+        pytest.skip("Skip on Windows.")
+
+    try:
+        with ZipFile(ZIPFILE_NAME, "w") as zf:
+            with zf.open("ml-1m/ratings.dat", "w") as ofs:
+                ofs.write(
+                    """1::2::5::0
+    1::3::5::86400
+    """.encode()
+                )
+            with zf.open("ml-1m/movies.dat", "w") as ofs:
+                ofs.write(
+                    """1::A fantastic movie (2020)::fantasy|thriller
+    1917::Vinni-Pukh(1969)::children
+    """.encode(
+                        "latin-1"
+                    )
+                )
+            with zf.open("ml-1m/users.dat", "w") as ofs:
+                ofs.write(
+                    """1::M::32::0::1690074
+    2::F::4::1::1760013
+    """.encode()
+                )
+
+        loader = MovieLens1MDataManager(ZIPFILE_NAME)
+        df = loader.read_interaction()
+        movie_info = loader.read_item_info()
+        user_info = loader.read_user_info()
+        assert df.shape == (2, 4)
+        np.testing.assert_array_equal(df["userId"].values, [1, 1])
+        np.testing.assert_array_equal(df["movieId"].values, [2, 3])
+        np.testing.assert_array_equal(df["rating"].values, [5, 5])
+        np.testing.assert_array_equal(
+            df["timestamp"].values,
+            np.asarray(
+                [
+                    "1970-01-01",
+                    "1970-01-02",
+                ],
+                dtype="datetime64[ns]",
+            ),
+        )
+        np.testing.assert_array_equal(movie_info.index.values, [1, 1917])
+        np.testing.assert_array_equal(movie_info.release_year, [2020, 1969])
+        np.testing.assert_array_equal(user_info.index.values, [1, 2])
+        np.testing.assert_array_equal(user_info.gender, ["M", "F"])
+    finally:
+        os.remove(ZIPFILE_NAME)
+
+
+def test_ml20m() -> None:
+    if sys.platform == "win32":
+        pytest.skip("Skip on Windows.")
+    try:
+        with ZipFile(ZIPFILE_NAME, "w") as zf:
+            with zf.open("ml-20m/ratings.csv", "w") as ofs:
+                ofs.write(
+                    """userId,movieId,rating,timestamp
+    1,2,5,0
+    1,3,5,86400
+    """.encode()
                 )
-            )
-        with zf.open("ml-1m/users.dat", "w") as ofs:
-            ofs.write(
-                """1::M::32::0::1690074
-2::F::4::1::1760013
-""".encode()
-            )
-
-    loader = MovieLens1MDataManager(fp.name)
-    df = loader.read_interaction()
-    movie_info = loader.read_item_info()
-    user_info = loader.read_user_info()
-    assert df.shape == (2, 4)
-    np.testing.assert_array_equal(df["userId"].values, [1, 1])
-    np.testing.assert_array_equal(df["movieId"].values, [2, 3])
-    np.testing.assert_array_equal(df["rating"].values, [5, 5])
-    np.testing.assert_array_equal(
-        df["timestamp"].values,
-        np.asarray(
-            [
-                "1970-01-01",
-                "1970-01-02",
-            ],
-            dtype="datetime64[ns]",
-        ),
-    )
-    np.testing.assert_array_equal(movie_info.index.values, [1, 1917])
-    np.testing.assert_array_equal(movie_info.release_year, [2020, 1969])
-    np.testing.assert_array_equal(user_info.index.values, [1, 2])
-    np.testing.assert_array_equal(user_info.gender, ["M", "F"])
+        loader = MovieLens20MDataManager(ZIPFILE_NAME)
+        df = loader.read_interaction()
+        np.testing.assert_array_equal(df["userId"].values, [1, 1])
+        np.testing.assert_array_equal(df["movieId"].values, [2, 3])
+        np.testing.assert_array_equal(df["rating"].values, [5, 5])
+        np.testing.assert_array_equal(
+            df["timestamp"].values,
+            np.asarray(
+                [
+                    "1970-01-01",
+                    "1970-01-02",
+                ],
+                dtype="datetime64[ns]",
+            ),
+        )
+    finally:
+        os.remove(ZIPFILE_NAME)
diff --git a/tests/dataset/test_ml20m.py b/tests/dataset/test_ml20m.py

Original file line number	Diff line number	Diff line change
`@@ -22,6 +22,9 @@ namespace irspack {`
`22`	`22`
`23`	`23`	`using CountVector = Eigen::Matrix<std::int64_t, Eigen::Dynamic, 1>;`
`24`	`24`	`struct Metrics {`
	`25`	`+ // This isn't necessary, but MSVC complains Metric is not default`
	`26`	`+ // constructible.`
	`27`	`+ inline Metrics() : Metrics(0) {}`
`25`	`28`	`inline Metrics(size_t n_item) : n_item(n_item), item_cnt(n_item) {`
`26`	`29`	`item_cnt.array() = 0;`
`27`	`30`	`}`