From e946fb38241702390627cb1560fddf4730af835b Mon Sep 17 00:00:00 2001
From: Maxime Nicolas <maxime.ncl+github@proton.me>
Date: Sat, 30 Mar 2024 16:11:20 +0100
Subject: [PATCH] vectorization: gaussian+fit_cmle_mixt+IAD_dist

Vectorization in gaussian.get_cdf(), mixture.fit_cmle_mixt(), estimation.IAD_dist(), and estimation.AD_dist().
---
 pycop/bivariate/estimation.py | 15 ++++++++++++---
 pycop/bivariate/gaussian.py   |  2 +-
 setup.py                      |  2 +-
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/pycop/bivariate/estimation.py b/pycop/bivariate/estimation.py
index 162642b..59763a5 100644
--- a/pycop/bivariate/estimation.py
+++ b/pycop/bivariate/estimation.py
@@ -115,7 +115,7 @@ def log_likelihood(parameters):
         else: # dim = 3
             w1, w2, w3, param1, param2, param3 = parameters
             params = [w1, w2, w3, param1, param2, param3]
-        logl = -sum([ np.log(copula.get_pdf(psd_obs[0][i], psd_obs[1][i],params)) for i in range(0, len(psd_obs[0]))])
+        logl = -np.sum(np.log(copula.get_pdf(psd_obs[0], psd_obs[1], params)))
         return logl
 
     # copula.dim gives the number of weights to consider
@@ -253,8 +253,12 @@ def IAD_dist(copula, data, param):
     x_values, y_values = np.linspace(1/n, 1-1/n, n), np.linspace(1/n, 1-1/n, n)
 
     # Compute the parametric (theoretical) copula values
-    C_copula = np.array([[copula.get_cdf(x, y, param) for x in x_values] for y in y_values])
+    u_flat = np.array([[x for x in x_values] for y in y_values]).flatten()
+    v_flat = np.array([[y for x in x_values] for y in y_values]).flatten()
 
+    C_copula = copula.get_cdf(np.array(u_flat), np.array(v_flat), param)
+    C_copula = C_copula.reshape((n,n))
+    
     # Calculate the Integrated Anderson-Darling distance
     IAD = np.sum(((C_empirical - C_copula) ** 2) / (C_copula - C_copula**2))
 
@@ -280,7 +284,12 @@ def AD_dist(copula, data, param):
     C_empirical = counts / n
     
     x_values, y_values = np.linspace(1/n, 1-1/n, n), np.linspace(1/n, 1-1/n, n)
-    C_copula = np.array([[copula.get_cdf(x, y, param) for x in x_values] for y in y_values])
+    
+    u_flat = np.array([[x for x in x_values] for y in y_values]).flatten()
+    v_flat = np.array([[y for x in x_values] for y in y_values]).flatten()
 
+    C_copula = copula.get_cdf(np.array(u_flat), np.array(v_flat), param)
+    C_copula = C_copula.reshape((n,n))
+    
     AD = np.max(((C_empirical - C_copula) ** 2) / (C_copula - C_copula**2))
     return AD
\ No newline at end of file
diff --git a/pycop/bivariate/gaussian.py b/pycop/bivariate/gaussian.py
index 08ec2ec..01587a5 100644
--- a/pycop/bivariate/gaussian.py
+++ b/pycop/bivariate/gaussian.py
@@ -52,7 +52,7 @@ def get_cdf(self, u, v, param):
         y2 = norm.ppf(v, 0, 1)
         rho = param[0]
 
-        return multivariate_normal.cdf((y1,y2), mean=None, cov=[[1, rho], [rho, 1]])
+        return multivariate_normal.cdf(np.array([y1, y2]).T, mean=None, cov=[[1, rho], [rho, 1]])
 
     def get_pdf(self, u, v, param):
         """
diff --git a/setup.py b/setup.py
index 35cfeb5..fe2db97 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name = 'pycop',
-    version = '0.0.12',  
+    version = '0.0.13',  
     description = 'Copula for multivariate dependence modeling',
     long_description=open('README.md', 'r').read(),
     long_description_content_type="text/markdown",