From e946fb38241702390627cb1560fddf4730af835b Mon Sep 17 00:00:00 2001 From: Maxime Nicolas Date: Sat, 30 Mar 2024 16:11:20 +0100 Subject: [PATCH] vectorization: gaussian+fit_cmle_mixt+IAD_dist Vectorization in gaussian.get_cdf(), mixture.fit_cmle_mixt(), estimation.IAD_dist(), and estimation.AD_dist(). --- pycop/bivariate/estimation.py | 15 ++++++++++++--- pycop/bivariate/gaussian.py | 2 +- setup.py | 2 +- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/pycop/bivariate/estimation.py b/pycop/bivariate/estimation.py index 162642b..59763a5 100644 --- a/pycop/bivariate/estimation.py +++ b/pycop/bivariate/estimation.py @@ -115,7 +115,7 @@ def log_likelihood(parameters): else: # dim = 3 w1, w2, w3, param1, param2, param3 = parameters params = [w1, w2, w3, param1, param2, param3] - logl = -sum([ np.log(copula.get_pdf(psd_obs[0][i], psd_obs[1][i],params)) for i in range(0, len(psd_obs[0]))]) + logl = -np.sum(np.log(copula.get_pdf(psd_obs[0], psd_obs[1], params))) return logl # copula.dim gives the number of weights to consider @@ -253,8 +253,12 @@ def IAD_dist(copula, data, param): x_values, y_values = np.linspace(1/n, 1-1/n, n), np.linspace(1/n, 1-1/n, n) # Compute the parametric (theoretical) copula values - C_copula = np.array([[copula.get_cdf(x, y, param) for x in x_values] for y in y_values]) + u_flat = np.array([[x for x in x_values] for y in y_values]).flatten() + v_flat = np.array([[y for x in x_values] for y in y_values]).flatten() + C_copula = copula.get_cdf(np.array(u_flat), np.array(v_flat), param) + C_copula = C_copula.reshape((n,n)) + # Calculate the Integrated Anderson-Darling distance IAD = np.sum(((C_empirical - C_copula) ** 2) / (C_copula - C_copula**2)) @@ -280,7 +284,12 @@ def AD_dist(copula, data, param): C_empirical = counts / n x_values, y_values = np.linspace(1/n, 1-1/n, n), np.linspace(1/n, 1-1/n, n) - C_copula = np.array([[copula.get_cdf(x, y, param) for x in x_values] for y in y_values]) + + u_flat = np.array([[x for x in x_values] for y in y_values]).flatten() + v_flat = np.array([[y for x in x_values] for y in y_values]).flatten() + C_copula = copula.get_cdf(np.array(u_flat), np.array(v_flat), param) + C_copula = C_copula.reshape((n,n)) + AD = np.max(((C_empirical - C_copula) ** 2) / (C_copula - C_copula**2)) return AD \ No newline at end of file diff --git a/pycop/bivariate/gaussian.py b/pycop/bivariate/gaussian.py index 08ec2ec..01587a5 100644 --- a/pycop/bivariate/gaussian.py +++ b/pycop/bivariate/gaussian.py @@ -52,7 +52,7 @@ def get_cdf(self, u, v, param): y2 = norm.ppf(v, 0, 1) rho = param[0] - return multivariate_normal.cdf((y1,y2), mean=None, cov=[[1, rho], [rho, 1]]) + return multivariate_normal.cdf(np.array([y1, y2]).T, mean=None, cov=[[1, rho], [rho, 1]]) def get_pdf(self, u, v, param): """ diff --git a/setup.py b/setup.py index 35cfeb5..fe2db97 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name = 'pycop', - version = '0.0.12', + version = '0.0.13', description = 'Copula for multivariate dependence modeling', long_description=open('README.md', 'r').read(), long_description_content_type="text/markdown",