|
| 1 | +######################################################################### |
| 2 | +# R functions for testing independence versus positive quadrant # |
| 3 | +# dependence corresponding to the manuscript titled, # |
| 4 | +# "Testing for positive quadrant dependence." # |
| 5 | +# Date: 06/17/2018 # |
| 6 | +######################################################################### |
| 7 | + |
| 8 | +library(Rcpp) |
| 9 | +library(copula) |
| 10 | + |
| 11 | +#***********************************************************************# |
| 12 | +# Function to produce EL-based test statistic for independence versus |
| 13 | +# positive quadrant dependence. |
| 14 | +cppFunction(" double Rcpp_IP_FF(NumericMatrix X, int n){ |
| 15 | + NumericVector Pn11(n*n); |
| 16 | + NumericVector Pn12(n*n); |
| 17 | + NumericVector Pn21(n*n); |
| 18 | + NumericVector Pn22(n*n); |
| 19 | + NumericVector Fn(n*n); |
| 20 | + NumericVector Gn(n*n); |
| 21 | + double Tn=0; |
| 22 | + int ii; int ij; |
| 23 | + for(int i=0; i<n*n; i++){ |
| 24 | + ii = i/n; ij = i%n; |
| 25 | + for(int j=0; j<n; j++){ |
| 26 | + if( X(j,0)<= X(ii,0) & X(j,1)<= X(ij,1)){ |
| 27 | + Pn11(i) = Pn11(i)+1; |
| 28 | + } |
| 29 | + if( X(j,0)<= X(ii,0) & X(j,1)> X(ij,1)){ |
| 30 | + Pn12(i) = Pn12(i)+1; |
| 31 | + } |
| 32 | + if( X(j,0)> X(ii,0) & X(j,1)<= X(ij,1)){ |
| 33 | + Pn21(i) = Pn21(i)+1; |
| 34 | + } |
| 35 | + if( X(j,0)> X(ii,0) & X(j,1)> X(ij,1)){ |
| 36 | + Pn22(i) = Pn22(i)+1; |
| 37 | + } |
| 38 | + } |
| 39 | + Pn11(i) = Pn11(i)/n; Pn12(i) = Pn12(i)/n; |
| 40 | + Pn21(i) = Pn21(i)/n; Pn22(i) = Pn22(i)/n; |
| 41 | + Fn(i) = Pn11(i)+Pn12(i); Gn(i) = Pn11(i)+Pn21(i); |
| 42 | + if(Pn11(i)>Fn(i)*Gn(i)){ |
| 43 | + if(Pn11(i)*Fn(i)*Gn(i)>0){ |
| 44 | + Tn = Tn + n*Pn11(i)*log(Fn(i)*Gn(i)/Pn11(i)); |
| 45 | + } |
| 46 | + if(Pn12(i)*Fn(i)*(1-Gn(i))>0){ |
| 47 | + Tn = Tn + n*Pn12(i)*log(Fn(i)*(1-Gn(i))/Pn12(i)); |
| 48 | + } |
| 49 | + if(Pn21(i)*(1-Fn(i))*Gn(i)>0){ |
| 50 | + Tn = Tn + n*Pn21(i)*log((1-Fn(i))*Gn(i)/Pn21(i)); |
| 51 | + } |
| 52 | + if(Pn22(i)*(1-Fn(i))*(1-Gn(i))>0){ |
| 53 | + Tn = Tn + n*Pn22(i)*log((1-Fn(i))*(1-Gn(i))/Pn22(i)); |
| 54 | + } |
| 55 | + } |
| 56 | + } |
| 57 | + return -2*Tn/(n*n); |
| 58 | + }") |
| 59 | + |
| 60 | +#***********************************************************************# |
| 61 | +# Function to produce EL-based test statistic of omnibus independence |
| 62 | +# test proposed in Einmahl and McKeague (2003) |
| 63 | +cppFunction(" double Rcpp_Ind_FF(NumericMatrix X, int n){ |
| 64 | + NumericVector Pn11(n*n); |
| 65 | + NumericVector Pn12(n*n); |
| 66 | + NumericVector Pn21(n*n); |
| 67 | + NumericVector Pn22(n*n); |
| 68 | + NumericVector Fn(n*n); |
| 69 | + NumericVector Gn(n*n); |
| 70 | + double Tn=0; |
| 71 | + int ii; int ij; |
| 72 | + for(int i=0; i<n*n; i++){ |
| 73 | + ii = i/n; ij = i%n; |
| 74 | + for(int j=0; j<n; j++){ |
| 75 | + if( X(j,0)<= X(ii,0) & X(j,1)<= X(ij,1)){ |
| 76 | + Pn11(i) = Pn11(i)+1; |
| 77 | + } |
| 78 | + if( X(j,0)<= X(ii,0) & X(j,1)> X(ij,1)){ |
| 79 | + Pn12(i) = Pn12(i)+1; |
| 80 | + } |
| 81 | + if( X(j,0)> X(ii,0) & X(j,1)<= X(ij,1)){ |
| 82 | + Pn21(i) = Pn21(i)+1; |
| 83 | + } |
| 84 | + if( X(j,0)> X(ii,0) & X(j,1)> X(ij,1)){ |
| 85 | + Pn22(i) = Pn22(i)+1; |
| 86 | + } |
| 87 | + } |
| 88 | + Pn11(i) = Pn11(i)/n; Pn12(i) = Pn12(i)/n; |
| 89 | + Pn21(i) = Pn21(i)/n; Pn22(i) = Pn22(i)/n; |
| 90 | + Fn(i) = Pn11(i)+Pn12(i); Gn(i) = Pn11(i)+Pn21(i); |
| 91 | + if(Pn11(i)*Fn(i)*Gn(i)>0){ |
| 92 | + Tn = Tn + n*Pn11(i)*log(Fn(i)*Gn(i)/Pn11(i)); |
| 93 | + } |
| 94 | + if(Pn12(i)*Fn(i)*(1-Gn(i))>0){ |
| 95 | + Tn = Tn + n*Pn12(i)*log(Fn(i)*(1-Gn(i))/Pn12(i)); |
| 96 | + } |
| 97 | + if(Pn21(i)*(1-Fn(i))*Gn(i)>0){ |
| 98 | + Tn = Tn + n*Pn21(i)*log((1-Fn(i))*Gn(i)/Pn21(i)); |
| 99 | + } |
| 100 | + if(Pn22(i)*(1-Fn(i))*(1-Gn(i))>0){ |
| 101 | + Tn = Tn + n*Pn22(i)*log((1-Fn(i))*(1-Gn(i))/Pn22(i)); |
| 102 | + } |
| 103 | + } |
| 104 | + return -2*Tn/(n*n); |
| 105 | + }") |
| 106 | + |
| 107 | +#***********************************************************************# |
| 108 | +# Function to produce the Kolmogorov–Smirnov, Cramer-von-Mises, and |
| 109 | +# Anderson-Darling functional distance between empirical copula and |
| 110 | +# independence copula for testing independence versus positive quadrant |
| 111 | +# dependence |
| 112 | +cppFunction(" NumericVector Rcpp_KCA_01(NumericMatrix X, int n){ |
| 113 | + NumericVector Fn(n); |
| 114 | + NumericVector Gn(n); |
| 115 | + for(int i=0; i<n; i++){ |
| 116 | + for(int j=0; j<n; j++){ |
| 117 | + if( X(j,0)<= X(i,0)){ |
| 118 | + Fn(i) = Fn(i)+(double)1/(n+1); |
| 119 | + } |
| 120 | + if( X(j,1)<= X(i,1)){ |
| 121 | + Gn(i) = Gn(i)+(double)1/(n+1); |
| 122 | + } |
| 123 | + } |
| 124 | + } |
| 125 | + NumericVector Cn(n); |
| 126 | + for(int i=0; i<n; i++){ |
| 127 | + for(int j=0; j<n; j++){ |
| 128 | + if( Fn(j) <= Fn(i) & Gn(j) <= Gn(i)){ |
| 129 | + Cn(i) = Cn(i) + (double)1/n; |
| 130 | + } |
| 131 | + } |
| 132 | + } |
| 133 | + double KS = 0; double CvM = 0; double AD = 0; |
| 134 | + for(int i=0; i<n; i++){ |
| 135 | + if(Cn(i)>Fn(i)*Gn(i)){ |
| 136 | + KS = std::max( pow(n,0.5)*(Cn(i)-Fn(i)*Gn(i)), KS); |
| 137 | + CvM = pow(Cn(i)-Fn(i)*Gn(i),2) + CvM; |
| 138 | + AD = pow(Cn(i)-Fn(i)*Gn(i),2)/(Fn(i)*Gn(i)*(1-Fn(i))*(1-Gn(i))) + AD; |
| 139 | + } |
| 140 | + } |
| 141 | + NumericVector TTS(3); |
| 142 | + TTS(0) = KS; TTS(1) = CvM; TTS(2) = AD; |
| 143 | + return TTS; |
| 144 | + }") |
| 145 | + |
| 146 | +#***********************************************************************# |
| 147 | +# Function to generate a random sample from Frank, Clayton, or Gumbel copula |
| 148 | +# according to Kandell's tau |
| 149 | +RV_CopTau = function(n=10,Tau=0,Copula="Frank"){ |
| 150 | + if(Copula == "Frank"){ |
| 151 | + FrankPara = iTau(frankCopula(2),Tau); |
| 152 | + myCop.Frank = archmCopula(family = "frank", dim = 2, param = FrankPara) |
| 153 | + X = rCopula(n, myCop.Frank) |
| 154 | + } |
| 155 | + if(Copula == "Clayton"){ |
| 156 | + ClayPara = iTau(claytonCopula(2),Tau); |
| 157 | + myCop.Clay = archmCopula(family = "clayton", dim = 2, param = ClayPara) |
| 158 | + X = rCopula(n, myCop.Clay) |
| 159 | + } |
| 160 | + if(Copula == "Gumbel"){ |
| 161 | + GumPara = iTau(gumbelCopula(2),Tau); |
| 162 | + myCop.Gumbel = archmCopula(family = "gumbel", dim = 2, param = GumPara) |
| 163 | + X = rCopula(n, myCop.Gumbel) |
| 164 | + } |
| 165 | + return(X) |
| 166 | +} |
| 167 | + |
| 168 | +#***********************************************************************# |
| 169 | +# Function to generate a random sample from the Gaussian copula |
| 170 | +# according to Pearson's rho |
| 171 | +RV_CopGaussian = function(n=10,Rho=0){ |
| 172 | + myCop.Gaussian = normalCopula(Rho) |
| 173 | + X = rCopula(n, myCop.Gaussian) |
| 174 | + return(X) |
| 175 | +} |
| 176 | + |
| 177 | +#***********************************************************************# |
| 178 | +# Main function to perform the EL, KS, CvM, AD, one-sided Spearman's and |
| 179 | +# Kendall's rank tests |
| 180 | +# Arguments: |
| 181 | +# X = First part of the paired data. |
| 182 | +# Y = Second part of the paired data. |
| 183 | +# alapha = Significance level (default 0.05). |
| 184 | +# graph = TRUE: to produce the scatterplot of the data and the pseudo-observations |
| 185 | +# or FALSE (default): no scatterplot |
| 186 | +# |
| 187 | +# Value: |
| 188 | +# IndvsPQD = A table in data frame provides the test statistic, decision |
| 189 | +# of rejecting independence (1: reject independence; 0: do not |
| 190 | +# reject independence), critical value, and p-value for each |
| 191 | +# following test: EL, KS, CvM, AD, one-sided Spearman and |
| 192 | +# Kendall's rank tests |
| 193 | +# (Large sample size (n=100) may take some time) |
| 194 | +IndvsPQD = function(X, Y, alpha=0.05, graph=FALSE){ |
| 195 | + n = length(X); |
| 196 | + if(graph==TRUE) |
| 197 | + { |
| 198 | + par(mar=c(4.5,5,3,0.5)) |
| 199 | + par(mfrow=c(1,2)) |
| 200 | + plot(X,Y,xlab="X", ylab="Y",xlim=range(X),ylim=range(Y),main="Scatterplot of the data") |
| 201 | + plot(rank(X)/(n+1),rank(Y)/(n+1),xlab="X", ylab="Y",xlim=c(0,1),ylim=c(0,1),main="Scatterplot of pseudo-observations") |
| 202 | + } |
| 203 | + Tn = array(,6); |
| 204 | + Tn[1] = Rcpp_IP_FF(cbind(X,Y), n); |
| 205 | + Tn[2:4] = Rcpp_KCA_01(cbind(X,Y), n); |
| 206 | + Tn[5] = cor(X,Y,method="spearman"); |
| 207 | + Tn[6] = cor(X,Y,method="kendall"); |
| 208 | + B = 10000; Tn0 = array(,c(B,6)); |
| 209 | + for(b in 1:B){ |
| 210 | + X0 = array(runif(2*n),c(n,2)); |
| 211 | + Tn0[b,1] = Rcpp_IP_FF(X0, n); |
| 212 | + Tn0[b,2:4] = Rcpp_KCA_01(X0, n); |
| 213 | + Tn0[b,5] = cor(X0[,1],X0[,2],method="spearman"); |
| 214 | + Tn0[b,6] = cor(X0[,1],X0[,2],method="kendall"); |
| 215 | + } |
| 216 | + |
| 217 | + CV = array(,6); p_values = array(,6); Reject_Indep = array(,6) |
| 218 | + CV = apply(Tn0,2,quantile,1-alpha); |
| 219 | + for(j in 1:6){ |
| 220 | + p_values[j] = mean(Tn0[,j]>Tn[j]) |
| 221 | + Reject_Indep[j] = (Tn[j]>CV[j]) |
| 222 | + } |
| 223 | + IndvsPQD = data.frame(cbind(Tn,p_values,Reject_Indep,CV)) |
| 224 | + colnames(IndvsPQD) = c("test_statistic", "p-value", "reject_independence", "critical_value") |
| 225 | + rownames(IndvsPQD) = c("EL", "KS", "CvM", "AD", "spearman", "kendall"); |
| 226 | + print("1: reject independence; 0: do not rejct independence") |
| 227 | + return(IndvsPQD) |
| 228 | +} |
| 229 | + |
| 230 | + |
0 commit comments