Merge branch 'develop' into issue-3

wleoncio · web-flow · commit a62372f30488 · 2025-03-31T10:56:16.000+02:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "pCRscore"
-version = "0.0.6+issue4.issue3"
+version = "0.0.7"
 authors = [
   {name="Youness Azimzade"}
 ]
diff --git a/src/pCRscore/misc.py b/src/pCRscore/misc.py
@@ -0,0 +1,16 @@
+def _binary_encode(data, column, out_values=[-1, 1], reverse=False):
+    unique_values = data[column].unique()
+    if reverse:
+        # Flip unique_values order
+        unique_values = unique_values[::-1]
+    if len(unique_values) != 2:
+        raise ValueError(f"{column} must contain exactly two unique values.")
+    print(
+        "Recoding '", unique_values[0], "' as ", -1,
+        " and '", unique_values[1], "' as ", 1, sep=''
+    )
+    value_map = {
+       unique_values[0]: out_values[0], unique_values[1]: out_values[1]
+    }
+    data[column] = data[column].map(value_map)
+    return data
diff --git a/src/pCRscore/svm.py b/src/pCRscore/svm.py
@@ -6,16 +6,16 @@
 from sklearn.model_selection import \
     GridSearchCV, train_test_split, KFold, cross_val_score
 from sklearn.svm import SVC
+from .misc import _binary_encode
 
 
 def preprocess(data, split_var='Cohort'):
     # Mapping the values in the 'Response' column to binary values 0 and 1
     resp = {'pCR': 1, 'RD': 0}
     data.Response = [resp[item] for item in data.Response]
 
-    # Mapping the values in the 'ER' column to binary values 0 and 1
-    er = {'Positive': 1, 'Negative': 0}
-    data.ER = [er[item] for item in data.ER]
+    # Mapping the values in the 'ER' column to binary values
+    data = _binary_encode(data, 'ER', out_values=[-1, 1])
 
     # Creating dummy variables for the categorical column 'PAM50'
     categorical_cols = ['PAM50']
diff --git a/tests/test_svm.py b/tests/test_svm.py
@@ -3,6 +3,7 @@
 from unittest import mock
 import pytest
 import numpy as np
+from pandas.testing import assert_frame_equal
 
 
 @pytest.fixture
@@ -92,3 +93,28 @@ def test_shapley():
     assert isinstance(shapl, np.ndarray)
     assert shapl.shape == (30, 44)
     svm.shap_plot(shapl, X)
+
+
+def test__binary_encode():
+    # Test with likely data
+    data = pd.DataFrame({'PAM50': ['Lum', 'Bas', 'Lum', 'Bas', 'Lum', 'Bas']})
+    data_encoded = svm._binary_encode(data, 'PAM50')
+    data_ref = pd.DataFrame({'PAM50': [-1, 1, -1, 1, -1, 1]})
+    assert_frame_equal(data_encoded, data_ref)
+
+    # Check that first value is always -1
+    data = pd.DataFrame({'X': ['A', 'Z']})
+    data_encoded = svm._binary_encode(data, 'X')
+    data_ref = pd.DataFrame({'X': [-1, 1]})
+    assert_frame_equal(data_encoded, data_ref)
+
+    data = pd.DataFrame({'X': ['Z', 'A']})
+    data_encoded = svm._binary_encode(data, 'X')
+    data_ref = pd.DataFrame({'X': [-1, 1]})
+    assert_frame_equal(data_encoded, data_ref)
+
+    # Reversing works
+    data = pd.DataFrame({'X': ['A', 'Z']})
+    data_encoded = svm._binary_encode(data, 'X', reverse=True)
+    data_ref = pd.DataFrame({'X': [1, -1]})
+    assert_frame_equal(data_encoded, data_ref)

Original file line number	Diff line number	Diff line change
`@@ -4,7 +4,7 @@ build-backend = "hatchling.build"`
`4`	`4`
`5`	`5`	`[project]`
`6`	`6`	`name = "pCRscore"`
`7`		`-version = "0.0.6+issue4.issue3"`
	`7`	`+version = "0.0.7"`
`8`	`8`	`authors = [`
`9`	`9`	`{name="Youness Azimzade"}`
`10`	`10`	`]`