From 992160955851abaf37c748366a18ab58b41b8d78 Mon Sep 17 00:00:00 2001
From: Katharine Xiao <2405771+katxiao@users.noreply.github.com>
Date: Thu, 14 Jul 2022 12:22:11 -0400
Subject: [PATCH 1/5] Add correlation similarity to column pairs and update

---
 sdmetrics/column_pairs/base.py                | 20 ++++-
 .../column_pairs/statistical/__init__.py      |  2 +
 .../statistical/correlation_similarity.py     | 90 +++++++++++++++++++
 .../statistical/statistic_similarity.py       | 22 ++---
 tests/unit/column_pairs/__init__.py           |  1 +
 .../unit/column_pairs/statistical/__init__.py |  1 +
 .../test_correlation_similarity.py            | 90 +++++++++++++++++++
 tests/unit/single_column/__init__.py          |  1 +
 .../single_column/statistical/__init__.py     |  1 +
 .../statistical/test_statistic_similarity.py  | 10 +--
 tests/utils.py                                | 28 ++++++
 11 files changed, 247 insertions(+), 19 deletions(-)
 create mode 100644 sdmetrics/column_pairs/statistical/correlation_similarity.py
 create mode 100644 tests/unit/column_pairs/__init__.py
 create mode 100644 tests/unit/column_pairs/statistical/__init__.py
 create mode 100644 tests/unit/column_pairs/statistical/test_correlation_similarity.py
 create mode 100644 tests/unit/single_column/__init__.py
 create mode 100644 tests/unit/single_column/statistical/__init__.py
 create mode 100644 tests/utils.py

diff --git a/sdmetrics/column_pairs/base.py b/sdmetrics/column_pairs/base.py
index e101f797..c1712524 100644
--- a/sdmetrics/column_pairs/base.py
+++ b/sdmetrics/column_pairs/base.py
@@ -35,7 +35,25 @@ def compute(real_data, synthetic_data):
                 pandas.DataFrame with 2 columns.
 
         Returns:
-            Union[float, tuple[float]]:
+            float:
                 Metric output.
         """
         raise NotImplementedError()
+
+    @classmethod
+    def compute_breakdown(cls, real_data, synthetic_data):
+        """Compute the breakdown of this metric.
+
+        Args:
+            real_data (pandas.DataFrame):
+                The values from the real dataset, passed as pandas.DataFrame
+                with 2 columns.
+            synthetic_data (pandas.DataFrame):
+                The values from the synthetic dataset, passed as a
+                pandas.DataFrame with 2 columns.
+
+        Returns:
+            dict
+                A mapping of the metric output. Must contain the key 'score'.
+        """
+        return {'score': cls.compute(real_data, synthetic_data)}
diff --git a/sdmetrics/column_pairs/statistical/__init__.py b/sdmetrics/column_pairs/statistical/__init__.py
index e79ac684..c9175c95 100644
--- a/sdmetrics/column_pairs/statistical/__init__.py
+++ b/sdmetrics/column_pairs/statistical/__init__.py
@@ -1,11 +1,13 @@
 """Statistical Metrics to compare column pairs."""
 
 from sdmetrics.column_pairs.statistical.contingency_similarity import ContingencySimilarity
+from sdmetrics.column_pairs.statistical.correlation_similarity import CorrelationSimilarity
 from sdmetrics.column_pairs.statistical.kl_divergence import (
     ContinuousKLDivergence, DiscreteKLDivergence)
 
 __all__ = [
     'ContingencySimilarity',
     'ContinuousKLDivergence',
+    'CorrelationSimilarity',
     'DiscreteKLDivergence',
 ]
diff --git a/sdmetrics/column_pairs/statistical/correlation_similarity.py b/sdmetrics/column_pairs/statistical/correlation_similarity.py
new file mode 100644
index 00000000..144ac66b
--- /dev/null
+++ b/sdmetrics/column_pairs/statistical/correlation_similarity.py
@@ -0,0 +1,90 @@
+"""Correlation Similarity Metric."""
+
+import pandas as pd
+from scipy.stats import pearsonr, spearmanr
+
+from sdmetrics.column_pairs.base import ColumnPairsMetric
+from sdmetrics.goal import Goal
+from sdmetrics.utils import is_datetime
+
+
+class CorrelationSimilarity(ColumnPairsMetric):
+    """Correlation similarity metric.
+
+    Attributes:
+        name (str):
+            Name to use when reports about this metric are printed.
+        goal (sdmetrics.goal.Goal):
+            The goal of this metric.
+        min_value (Union[float, tuple[float]]):
+            Minimum value or values that this metric can take.
+        max_value (Union[float, tuple[float]]):
+            Maximum value or values that this metric can take.
+    """
+
+    name = 'CorrelationSimilarity'
+    goal = Goal.MAXIMIZE
+    min_value = 0.0
+    max_value = 1.0
+
+    @classmethod
+    def compute(cls, real_data, synthetic_data, coefficient='Pearson'):
+        """Compare the correlation similarity of two continuous columns.
+
+        Args:
+            real_data (Union[numpy.ndarray, pandas.Series]):
+                The values from the real dataset.
+            synthetic_data (Union[numpy.ndarray, pandas.Series]):
+                The values from the synthetic dataset.
+
+        Returns:
+            float:
+                The correlation similarity of the two columns.
+        """
+        real_data = pd.Series(real_data).dropna()
+        synthetic_data = pd.Series(synthetic_data).dropna()
+
+        if is_datetime(real_data):
+            real_data = pd.to_numeric(real_data)
+            synthetic_data = pd.to_numeric(synthetic_data)
+
+        correlation_fn = None
+        if coefficient == 'Pearson':
+            correlation_fn = pearsonr
+        elif coefficient == 'Spearman':
+            correlation_fn = spearmanr
+        else:
+            raise ValueError(f'requested coefficient {coefficient} is not valid. '
+                             'Please choose either Pearson or Spearman.')
+
+        return correlation_fn(real_data, synthetic_data)
+
+    @classmethod
+    def compute_breakdown(cls, real_data, synthetic_data, coefficient='Pearson'):
+        """Compare the breakdown of correlation similarity of two continuous columns.
+
+        Args:
+            real_data (Union[numpy.ndarray, pandas.Series]):
+                The values from the real dataset.
+            synthetic_data (Union[numpy.ndarray, pandas.Series]):
+                The values from the synthetic dataset.
+
+        Returns:
+            dict:
+                A dict containing the score, and the real and synthetic metric values.
+        """
+        return {'score': cls.compute(real_data, synthetic_data, coefficient)}
+
+    @classmethod
+    def normalize(cls, raw_score):
+        """Return the `raw_score` as is, since it is already normalized.
+
+        Args:
+            raw_score (float):
+                The value of the metric from `compute`.
+
+        Returns:
+            float:
+                The normalized value of the metric
+        """
+        return super().normalize(raw_score)
diff --git a/sdmetrics/single_column/statistical/statistic_similarity.py b/sdmetrics/single_column/statistical/statistic_similarity.py
index 678325fc..53450236 100644
--- a/sdmetrics/single_column/statistical/statistic_similarity.py
+++ b/sdmetrics/single_column/statistical/statistic_similarity.py
@@ -19,20 +19,15 @@ class StatisticSimilarity(SingleColumnMetric):
             Minimum value or values that this metric can take.
         max_value (Union[float, tuple[float]]):
             Maximum value or values that this metric can take.
-        statistic (str):
-            The statistic to compute the metric on (mean, std, or median). Defaults to mean.
     """
 
     name = 'StatisticSimilarity'
     goal = Goal.MAXIMIZE
     min_value = 0.0
     max_value = 1.0
-    statistic = 'mean'
 
-    def __init__(self, statistic='mean'):
-        self.statistic = statistic
-
-    def compute(self, real_data, synthetic_data):
+    @classmethod
+    def compute(cls, real_data, synthetic_data, statistic='mean'):
         """Compare the statistic similarity of two continuous columns.
 
         Args:
@@ -45,9 +40,10 @@ def compute(self, real_data, synthetic_data):
             float:
                 The statistical similarity of the two columns.
         """
-        return self.compute_breakdown(real_data, synthetic_data)['score']
+        return cls.compute_breakdown(real_data, synthetic_data, statistic)['score']
 
-    def compute_breakdown(self, real_data, synthetic_data):
+    @staticmethod
+    def compute_breakdown(real_data, synthetic_data, statistic='mean'):
         """Compare the breakdown of statistic similarity of two continuous columns.
 
         Args:
@@ -67,17 +63,17 @@ def compute_breakdown(self, real_data, synthetic_data):
             real_data = pd.to_numeric(real_data)
             synthetic_data = pd.to_numeric(synthetic_data)
 
-        if self.statistic == 'mean':
+        if statistic == 'mean':
             score_real = real_data.mean()
             score_synthetic = synthetic_data.mean()
-        elif self.statistic == 'std':
+        elif statistic == 'std':
             score_real = real_data.std()
             score_synthetic = synthetic_data.std()
-        elif self.statistic == 'median':
+        elif statistic == 'median':
             score_real = real_data.median()
             score_synthetic = synthetic_data.median()
         else:
-            raise ValueError(f'requested statistic {self.statistic} is not valid. '
+            raise ValueError(f'requested statistic {statistic} is not valid. '
                              'Please choose either mean, std, or median.')
 
         score = 1 - abs(score_real - score_synthetic) / (real_data.max() - real_data.min())
diff --git a/tests/unit/column_pairs/__init__.py b/tests/unit/column_pairs/__init__.py
new file mode 100644
index 00000000..aac7ef87
--- /dev/null
+++ b/tests/unit/column_pairs/__init__.py
@@ -0,0 +1 @@
+"""Unit tests for the column pairs module."""
diff --git a/tests/unit/column_pairs/statistical/__init__.py b/tests/unit/column_pairs/statistical/__init__.py
new file mode 100644
index 00000000..13be4859
--- /dev/null
+++ b/tests/unit/column_pairs/statistical/__init__.py
@@ -0,0 +1 @@
+"""Unit tests for the column pairs statistical metrics."""
diff --git a/tests/unit/column_pairs/statistical/test_correlation_similarity.py b/tests/unit/column_pairs/statistical/test_correlation_similarity.py
new file mode 100644
index 00000000..525e9a45
--- /dev/null
+++ b/tests/unit/column_pairs/statistical/test_correlation_similarity.py
@@ -0,0 +1,90 @@
+from unittest.mock import Mock, patch
+
+import pandas as pd
+
+from sdmetrics.column_pairs.statistical import CorrelationSimilarity
+from tests.utils import SeriesMatcher
+
+
+class TestCorrelationSimilarity:
+
+    @patch('sdmetrics.column_pairs.statistical.correlation_similarity.pearsonr')
+    def test_compute(self, pearson_mock):
+        """Test the ``compute`` method.
+
+        Expect that the selected coefficient is used to compare the real and synthetic data.
+
+        Setup:
+        - Patch the ``scipy.stats.pearsonr`` method to return a test result.
+
+        Input:
+        - Real data.
+        - Synthetic data.
+
+        Output:
+        - The evaluated metric.
+        """
+        # Setup
+        real_data = pd.Series([1.0, 2.4, 2.6, 0.8])
+        synthetic_data = pd.Series([0.9, 1.8, 3.1, 5.0])
+
+        metric = CorrelationSimilarity()
+
+        # Run
+        result = metric.compute(real_data, synthetic_data, coefficient='Pearson')
+
+        # Assert
+        pearson_mock.assert_called_once_with(
+            SeriesMatcher(real_data), SeriesMatcher(synthetic_data))
+        assert result == pearson_mock.return_value
+
+    def test_compute_breakdown(self):
+        """Test the ``compute_breakdown`` method.
+
+        Expect that the selected coefficient is used to compare the real and synthetic data.
+
+        Setup:
+        - Mock the ``compute`` method to return a test score.
+
+        Input:
+        - Mocked real data.
+        - Mocked synthetic data.
+
+        Output:
+        - A mapping of the metric results, containing the score and the real and synthetic results.
+        """
+        # Setup
+        test_score = 0.2
+        metric = CorrelationSimilarity()
+
+        # Run
+        with patch.object(CorrelationSimilarity, 'compute', return_value=test_score):
+            result = metric.compute_breakdown(Mock(), Mock(), coefficient='Pearson')
+
+        # Assert
+        assert result == {'score': test_score}
+
+    @patch(
+        'sdmetrics.column_pairs.statistical.correlation_similarity.ColumnPairsMetric.normalize'
+    )
+    def test_normalize(self, normalize_mock):
+        """Test the ``normalize`` method.
+
+        Expect that the inherited ``normalize`` method is called.
+
+        Input:
+        - Raw score
+
+        Output:
+        - The output of the inherited ``normalize`` method.
+        """
+        # Setup
+        metric = CorrelationSimilarity()
+        raw_score = 0.9
+
+        # Run
+        result = metric.normalize(raw_score)
+
+        # Assert
+        normalize_mock.assert_called_once_with(raw_score)
+        assert result == normalize_mock.return_value
diff --git a/tests/unit/single_column/__init__.py b/tests/unit/single_column/__init__.py
new file mode 100644
index 00000000..051ac498
--- /dev/null
+++ b/tests/unit/single_column/__init__.py
@@ -0,0 +1 @@
+"""Unit tests for the single column module."""
diff --git a/tests/unit/single_column/statistical/__init__.py b/tests/unit/single_column/statistical/__init__.py
new file mode 100644
index 00000000..9ba140e4
--- /dev/null
+++ b/tests/unit/single_column/statistical/__init__.py
@@ -0,0 +1 @@
+"""Unit tests for the statistical single column metrics."""
diff --git a/tests/unit/single_column/statistical/test_statistic_similarity.py b/tests/unit/single_column/statistical/test_statistic_similarity.py
index 9c493291..a85d5368 100644
--- a/tests/unit/single_column/statistical/test_statistic_similarity.py
+++ b/tests/unit/single_column/statistical/test_statistic_similarity.py
@@ -26,10 +26,10 @@ def test_compute_breakdown(self):
         real_data = pd.Series([1.0, 2.4, 2.6, 0.8])
         synthetic_data = pd.Series([0.9, 1.8, 3.1, 5.0])
 
-        metric = StatisticSimilarity(statistic='mean')
+        metric = StatisticSimilarity()
 
         # Run
-        result = metric.compute_breakdown(real_data, synthetic_data)
+        result = metric.compute_breakdown(real_data, synthetic_data, statistic='mean')
 
         # Assert
         assert result == {'score': 1 - (2.7 - 1.7) / 1.8, 'real': 1.7, 'synthetic': 2.7}
@@ -52,11 +52,11 @@ def test_compute(self):
         # Setup
         metric_breakdown = {'score': 0.56, 'real': 1.7, 'synthetic': 2.7}
 
-        metric = StatisticSimilarity(statistic='mean')
+        metric = StatisticSimilarity()
 
         # Run
         with patch.object(StatisticSimilarity, 'compute_breakdown', return_value=metric_breakdown):
-            result = metric.compute(Mock(), Mock())
+            result = metric.compute(Mock(), Mock(), statistic='mean')
 
         # Assert
         assert result == 0.56
@@ -74,7 +74,7 @@ def test_normalize(self, normalize_mock):
         - The output of the inherited ``normalize`` method.
         """
         # Setup
-        metric = StatisticSimilarity(statistic='mean')
+        metric = StatisticSimilarity()
         raw_score = 0.9
 
         # Run
diff --git a/tests/utils.py b/tests/utils.py
new file mode 100644
index 00000000..37f6a71c
--- /dev/null
+++ b/tests/utils.py
@@ -0,0 +1,28 @@
+"""Utils for testing."""
+import pandas as pd
+
+
+class DataFrameMatcher:
+    """Match a given Pandas DataFrame in a mock function call."""
+
+    def __init__(self, df):
+        """Initialize the DataFrame."""
+        self.df = df
+
+    def __eq__(self, other):
+        """Assert equality using pandas testing module."""
+        pd.testing.assert_frame_equal(self.df, other)
+        return True
+
+
+class SeriesMatcher:
+    """Match a given Pandas Series in a mock function call."""
+
+    def __init__(self, data):
+        """Initialize the Series."""
+        self.data = data
+
+    def __eq__(self, other):
+        """Assert equality using pandas testing module."""
+        pd.testing.assert_series_equal(self.data, other)
+        return True

From 41b04b57d40f9e83c2b61e64c356756f7de1ee87 Mon Sep 17 00:00:00 2001
From: Katharine Xiao <2405771+katxiao@users.noreply.github.com>
Date: Thu, 14 Jul 2022 12:25:40 -0400
Subject: [PATCH 2/5] Add unit test for column pairs

---
 tests/unit/column_pairs/test_base.py | 32 ++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 tests/unit/column_pairs/test_base.py

diff --git a/tests/unit/column_pairs/test_base.py b/tests/unit/column_pairs/test_base.py
new file mode 100644
index 00000000..d0d248aa
--- /dev/null
+++ b/tests/unit/column_pairs/test_base.py
@@ -0,0 +1,32 @@
+from unittest.mock import Mock, patch
+
+from sdmetrics.column_pairs.base import ColumnPairsMetric
+
+
+class TestColumnPairsMetric:
+
+    def test_compute_breakdown(self):
+        """Test the ``compute_breakdown`` method.
+
+        Expect a breakdown dictionary is returned that contains the score.
+
+        Setup:
+        - Mock the ``compute`` method to return a fake score.
+
+        Input:
+        - Real data.
+        - Synthetic data.
+
+        Output:
+        - The evaluated metric.
+        """
+        # Setup
+        metric = ColumnPairsMetric()
+        test_metric_score = 0.5
+
+        # Run
+        with patch.object(ColumnPairsMetric, 'compute', return_value=test_metric_score):
+            result = metric.compute_breakdown(Mock(), Mock())
+
+        # Assert
+        assert result == {'score': test_metric_score}

From 4720fab2736009d3aee85f451586f81fc6467c87 Mon Sep 17 00:00:00 2001
From: Katharine Xiao <2405771+katxiao@users.noreply.github.com>
Date: Thu, 14 Jul 2022 12:29:39 -0400
Subject: [PATCH 3/5] Add single table metric

---
 sdmetrics/single_table/multi_column_pairs.py | 25 ++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/sdmetrics/single_table/multi_column_pairs.py b/sdmetrics/single_table/multi_column_pairs.py
index f96a90fc..7443bf13 100644
--- a/sdmetrics/single_table/multi_column_pairs.py
+++ b/sdmetrics/single_table/multi_column_pairs.py
@@ -186,3 +186,28 @@ class ContingencySimilarity(MultiColumnPairsMetric):
 
     field_types = ('boolean', 'categorical')
     column_pairs_metric = column_pairs.statistical.contingency_similarity.ContingencySimilarity
+
+
+class CorrelationSimilarity(MultiColumnPairsMetric):
+    """MultiColumnPairsMetric based on ColumnPairs CorrelationSimilarity.
+
+    This computes the correlation between column pairs based on the specified coefficient,
+    which defaults to 'Pearson'.
+
+    Attributes:
+        name (str):
+            Name to use when reports about this metric are printed.
+        goal (sdmetrics.goal.Goal):
+            The goal of this metric.
+        min_value (Union[float, tuple[float]]):
+            Minimum value or values that this metric can take.
+        max_value (Union[float, tuple[float]]):
+            Maximum value or values that this metric can take.
+        column_pairs_metric (sdmetrics.column_pairs.base.ColumnPairsMetric):
+            ColumnPairs DiscreteKLDivergence.
+        field_types (dict):
+            Field types to which the SingleColumn metric will be applied.
+    """
+
+    field_types = ('numerical', 'datetime')
+    column_pairs_metric = column_pairs.statistical.correlation_similarity.CorrelationSimilarity

From e0516c3459b9bffed7cba2d3158b6fa412978511 Mon Sep 17 00:00:00 2001
From: Katharine Xiao <2405771+katxiao@users.noreply.github.com>
Date: Thu, 14 Jul 2022 15:30:54 -0400
Subject: [PATCH 4/5] Update metric

---
 .../statistical/correlation_similarity.py     | 31 ++++++----
 .../test_correlation_similarity.py            | 58 ++++++++++++-------
 2 files changed, 55 insertions(+), 34 deletions(-)

diff --git a/sdmetrics/column_pairs/statistical/correlation_similarity.py b/sdmetrics/column_pairs/statistical/correlation_similarity.py
index 144ac66b..d6ef20a2 100644
--- a/sdmetrics/column_pairs/statistical/correlation_similarity.py
+++ b/sdmetrics/column_pairs/statistical/correlation_similarity.py
@@ -28,8 +28,8 @@ class CorrelationSimilarity(ColumnPairsMetric):
     max_value = 1.0
 
     @classmethod
-    def compute(cls, real_data, synthetic_data, coefficient='Pearson'):
-        """Compare the correlation similarity of two continuous columns.
+    def compute_breakdown(cls, real_data, synthetic_data, coefficient='Pearson'):
+        """Compare the breakdown of correlation similarity of two continuous columns.
 
         Args:
             real_data (Union[numpy.ndarray, pandas.Series]):
@@ -38,11 +38,12 @@ def compute(cls, real_data, synthetic_data, coefficient='Pearson'):
                 The values from the synthetic dataset.
 
         Returns:
-            float:
-                The correlation similarity of the two columns.
+            dict:
+                A dict containing the score, and the real and synthetic metric values.
         """
-        real_data = pd.Series(real_data).dropna()
-        synthetic_data = pd.Series(synthetic_data).dropna()
+        real_data[pd.isna(real_data)] = 0.0
+        synthetic_data[pd.isna(synthetic_data)] = 0.0
+        column1, column2 = real_data.columns[:2]
 
         if is_datetime(real_data):
             real_data = pd.to_numeric(real_data)
@@ -57,11 +58,17 @@ def compute(cls, real_data, synthetic_data, coefficient='Pearson'):
             raise ValueError(f'requested coefficient {coefficient} is not valid. '
                              'Please choose either Pearson or Spearman.')
 
-        return correlation_fn(real_data, synthetic_data)
+        correlation_real = correlation_fn(real_data[column1], real_data[column2])
+        correlation_synthetic = correlation_fn(synthetic_data[column1], synthetic_data[column2])
+        return {
+            'score': 1 - abs(correlation_real - correlation_synthetic) / 2,
+            'real': correlation_real,
+            'synthetic': correlation_synthetic,
+        }
 
     @classmethod
-    def compute_breakdown(cls, real_data, synthetic_data, coefficient='Pearson'):
-        """Compare the breakdown of correlation similarity of two continuous columns.
+    def compute(cls, real_data, synthetic_data, coefficient='Pearson'):
+        """Compare the correlation similarity of two continuous columns.
 
         Args:
             real_data (Union[numpy.ndarray, pandas.Series]):
@@ -70,10 +77,10 @@ def compute_breakdown(cls, real_data, synthetic_data, coefficient='Pearson'):
                 The values from the synthetic dataset.
 
         Returns:
-            dict:
-                A dict containing the score, and the real and synthetic metric values.
+            float:
+                The correlation similarity of the two columns.
         """
-        return {'score': cls.compute(real_data, synthetic_data, coefficient)}
+        return cls.compute_breakdown(real_data, synthetic_data, coefficient)['score']
 
     @classmethod
     def normalize(cls, raw_score):
diff --git a/tests/unit/column_pairs/statistical/test_correlation_similarity.py b/tests/unit/column_pairs/statistical/test_correlation_similarity.py
index 525e9a45..71a63d62 100644
--- a/tests/unit/column_pairs/statistical/test_correlation_similarity.py
+++ b/tests/unit/column_pairs/statistical/test_correlation_similarity.py
@@ -1,4 +1,4 @@
-from unittest.mock import Mock, patch
+from unittest.mock import Mock, call, patch
 
 import pandas as pd
 
@@ -9,8 +9,8 @@
 class TestCorrelationSimilarity:
 
     @patch('sdmetrics.column_pairs.statistical.correlation_similarity.pearsonr')
-    def test_compute(self, pearson_mock):
-        """Test the ``compute`` method.
+    def test_compute_breakdown(self, pearson_mock):
+        """Test the ``compute_breakdown`` method.
 
         Expect that the selected coefficient is used to compare the real and synthetic data.
 
@@ -18,28 +18,37 @@ def test_compute(self, pearson_mock):
         - Patch the ``scipy.stats.pearsonr`` method to return a test result.
 
         Input:
-        - Real data.
-        - Synthetic data.
+        - Mocked real data.
+        - Mocked synthetic data.
 
         Output:
-        - The evaluated metric.
+        - A mapping of the metric results, containing the score and the real and synthetic results.
         """
         # Setup
-        real_data = pd.Series([1.0, 2.4, 2.6, 0.8])
-        synthetic_data = pd.Series([0.9, 1.8, 3.1, 5.0])
-
-        metric = CorrelationSimilarity()
+        real_data = pd.DataFrame({'col1': [1.0, 2.4, 2.6, 0.8], 'col2': [1, 2, 3, 4]})
+        synthetic_data = pd.DataFrame({'col1': [0.9, 1.8, 3.1, 5.0], 'col2': [2, 3, 4, 1]})
+        score_real = -0.451
+        score_synthetic = -0.003
+        pearson_mock.side_effect = [score_real, score_synthetic]
+        expected_score_breakdown = {
+            'score': 1 - abs(score_real - score_synthetic) / 2,
+            'real': score_real,
+            'synthetic': score_synthetic,
+        }
 
         # Run
-        result = metric.compute(real_data, synthetic_data, coefficient='Pearson')
+        metric = CorrelationSimilarity()
+        result = metric.compute_breakdown(real_data, synthetic_data, coefficient='Pearson')
 
         # Assert
-        pearson_mock.assert_called_once_with(
-            SeriesMatcher(real_data), SeriesMatcher(synthetic_data))
-        assert result == pearson_mock.return_value
+        assert pearson_mock.has_calls(
+            call(SeriesMatcher(real_data['col1']), SeriesMatcher(real_data['col2'])),
+            call(SeriesMatcher(synthetic_data['col1']), SeriesMatcher(synthetic_data['col2'])),
+        )
+        assert result == expected_score_breakdown
 
-    def test_compute_breakdown(self):
-        """Test the ``compute_breakdown`` method.
+    def test_compute(self):
+        """Test the ``compute`` method.
 
         Expect that the selected coefficient is used to compare the real and synthetic data.
 
@@ -47,22 +56,27 @@ def test_compute_breakdown(self):
         - Mock the ``compute`` method to return a test score.
 
         Input:
-        - Mocked real data.
-        - Mocked synthetic data.
+        - Real data.
+        - Synthetic data.
 
         Output:
-        - A mapping of the metric results, containing the score and the real and synthetic results.
+        - The evaluated metric.
         """
         # Setup
         test_score = 0.2
+        score_breakdown = {'score': test_score}
         metric = CorrelationSimilarity()
 
         # Run
-        with patch.object(CorrelationSimilarity, 'compute', return_value=test_score):
-            result = metric.compute_breakdown(Mock(), Mock(), coefficient='Pearson')
+        with patch.object(
+            CorrelationSimilarity,
+            'compute_breakdown',
+            return_value=score_breakdown,
+        ):
+            result = metric.compute(Mock(), Mock(), coefficient='Pearson')
 
         # Assert
-        assert result == {'score': test_score}
+        assert result == test_score
 
     @patch(
         'sdmetrics.column_pairs.statistical.correlation_similarity.ColumnPairsMetric.normalize'

From 5846cc64dcf7eeb7ba7bb553ef471bf01c7cd678 Mon Sep 17 00:00:00 2001
From: Katharine Xiao <2405771+katxiao@users.noreply.github.com>
Date: Wed, 20 Jul 2022 14:48:18 -0400
Subject: [PATCH 5/5] Add unit test

---
 .../test_correlation_similarity.py            | 46 +++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/tests/unit/column_pairs/statistical/test_correlation_similarity.py b/tests/unit/column_pairs/statistical/test_correlation_similarity.py
index 71a63d62..715767e2 100644
--- a/tests/unit/column_pairs/statistical/test_correlation_similarity.py
+++ b/tests/unit/column_pairs/statistical/test_correlation_similarity.py
@@ -1,3 +1,4 @@
+from datetime import datetime
 from unittest.mock import Mock, call, patch
 
 import pandas as pd
@@ -47,6 +48,51 @@ def test_compute_breakdown(self, pearson_mock):
         )
         assert result == expected_score_breakdown
 
+    @patch('sdmetrics.column_pairs.statistical.correlation_similarity.pearsonr')
+    def test_compute_breakdown_datetime(self, pearson_mock):
+        """Test the ``compute_breakdown`` method with datetime input.
+
+        Expect that the selected coefficient is used to compare the real and synthetic data.
+
+        Setup:
+        - Patch the ``scipy.stats.pearsonr`` method to return a test result.
+
+        Input:
+        - Mocked real data.
+        - Mocked synthetic data.
+
+        Output:
+        - A mapping of the metric results, containing the score and the real and synthetic results.
+        """
+        # Setup
+        real_data = pd.DataFrame({
+            'col1': [datetime(2020, 1, 3), datetime(2020, 10, 13), datetime(2021, 5, 3)],
+            'col2': [datetime(2021, 7, 23), datetime(2021, 8, 3), datetime(2020, 9, 24)],
+        })
+        synthetic_data = pd.DataFrame({
+            'col1': [datetime(2021, 9, 19), datetime(2021, 10, 1), datetime(2020, 3, 1)],
+            'col2': [datetime(2022, 4, 28), datetime(2021, 7, 31), datetime(2020, 4, 2)],
+        })
+        score_real = 0.2
+        score_synthetic = 0.1
+        pearson_mock.side_effect = [score_real, score_synthetic]
+        expected_score_breakdown = {
+            'score': 1 - abs(score_real - score_synthetic) / 2,
+            'real': score_real,
+            'synthetic': score_synthetic,
+        }
+
+        # Run
+        metric = CorrelationSimilarity()
+        result = metric.compute_breakdown(real_data, synthetic_data, coefficient='Pearson')
+
+        # Assert
+        assert pearson_mock.has_calls(
+            call(SeriesMatcher(real_data['col1']), SeriesMatcher(real_data['col2'])),
+            call(SeriesMatcher(synthetic_data['col1']), SeriesMatcher(synthetic_data['col2'])),
+        )
+        assert result == expected_score_breakdown
+
     def test_compute(self):
         """Test the ``compute`` method.