Skip to content

Commit 7f50b1f

Browse files
authored
Add files via upload
0 parents  commit 7f50b1f

File tree

9 files changed

+973
-0
lines changed

9 files changed

+973
-0
lines changed

EL_PQD_Library.R

Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
#########################################################################
2+
# R functions for testing independence versus positive quadrant #
3+
# dependence corresponding to the manuscript titled, #
4+
# "Testing for positive quadrant dependence." #
5+
# Date: 06/17/2018 #
6+
#########################################################################
7+
8+
library(Rcpp)
9+
library(copula)
10+
11+
#***********************************************************************#
12+
# Function to produce EL-based test statistic for independence versus
13+
# positive quadrant dependence.
14+
cppFunction(" double Rcpp_IP_FF(NumericMatrix X, int n){
15+
NumericVector Pn11(n*n);
16+
NumericVector Pn12(n*n);
17+
NumericVector Pn21(n*n);
18+
NumericVector Pn22(n*n);
19+
NumericVector Fn(n*n);
20+
NumericVector Gn(n*n);
21+
double Tn=0;
22+
int ii; int ij;
23+
for(int i=0; i<n*n; i++){
24+
ii = i/n; ij = i%n;
25+
for(int j=0; j<n; j++){
26+
if( X(j,0)<= X(ii,0) & X(j,1)<= X(ij,1)){
27+
Pn11(i) = Pn11(i)+1;
28+
}
29+
if( X(j,0)<= X(ii,0) & X(j,1)> X(ij,1)){
30+
Pn12(i) = Pn12(i)+1;
31+
}
32+
if( X(j,0)> X(ii,0) & X(j,1)<= X(ij,1)){
33+
Pn21(i) = Pn21(i)+1;
34+
}
35+
if( X(j,0)> X(ii,0) & X(j,1)> X(ij,1)){
36+
Pn22(i) = Pn22(i)+1;
37+
}
38+
}
39+
Pn11(i) = Pn11(i)/n; Pn12(i) = Pn12(i)/n;
40+
Pn21(i) = Pn21(i)/n; Pn22(i) = Pn22(i)/n;
41+
Fn(i) = Pn11(i)+Pn12(i); Gn(i) = Pn11(i)+Pn21(i);
42+
if(Pn11(i)>Fn(i)*Gn(i)){
43+
if(Pn11(i)*Fn(i)*Gn(i)>0){
44+
Tn = Tn + n*Pn11(i)*log(Fn(i)*Gn(i)/Pn11(i));
45+
}
46+
if(Pn12(i)*Fn(i)*(1-Gn(i))>0){
47+
Tn = Tn + n*Pn12(i)*log(Fn(i)*(1-Gn(i))/Pn12(i));
48+
}
49+
if(Pn21(i)*(1-Fn(i))*Gn(i)>0){
50+
Tn = Tn + n*Pn21(i)*log((1-Fn(i))*Gn(i)/Pn21(i));
51+
}
52+
if(Pn22(i)*(1-Fn(i))*(1-Gn(i))>0){
53+
Tn = Tn + n*Pn22(i)*log((1-Fn(i))*(1-Gn(i))/Pn22(i));
54+
}
55+
}
56+
}
57+
return -2*Tn/(n*n);
58+
}")
59+
60+
#***********************************************************************#
61+
# Function to produce EL-based test statistic of omnibus independence
62+
# test proposed in Einmahl and McKeague (2003)
63+
cppFunction(" double Rcpp_Ind_FF(NumericMatrix X, int n){
64+
NumericVector Pn11(n*n);
65+
NumericVector Pn12(n*n);
66+
NumericVector Pn21(n*n);
67+
NumericVector Pn22(n*n);
68+
NumericVector Fn(n*n);
69+
NumericVector Gn(n*n);
70+
double Tn=0;
71+
int ii; int ij;
72+
for(int i=0; i<n*n; i++){
73+
ii = i/n; ij = i%n;
74+
for(int j=0; j<n; j++){
75+
if( X(j,0)<= X(ii,0) & X(j,1)<= X(ij,1)){
76+
Pn11(i) = Pn11(i)+1;
77+
}
78+
if( X(j,0)<= X(ii,0) & X(j,1)> X(ij,1)){
79+
Pn12(i) = Pn12(i)+1;
80+
}
81+
if( X(j,0)> X(ii,0) & X(j,1)<= X(ij,1)){
82+
Pn21(i) = Pn21(i)+1;
83+
}
84+
if( X(j,0)> X(ii,0) & X(j,1)> X(ij,1)){
85+
Pn22(i) = Pn22(i)+1;
86+
}
87+
}
88+
Pn11(i) = Pn11(i)/n; Pn12(i) = Pn12(i)/n;
89+
Pn21(i) = Pn21(i)/n; Pn22(i) = Pn22(i)/n;
90+
Fn(i) = Pn11(i)+Pn12(i); Gn(i) = Pn11(i)+Pn21(i);
91+
if(Pn11(i)*Fn(i)*Gn(i)>0){
92+
Tn = Tn + n*Pn11(i)*log(Fn(i)*Gn(i)/Pn11(i));
93+
}
94+
if(Pn12(i)*Fn(i)*(1-Gn(i))>0){
95+
Tn = Tn + n*Pn12(i)*log(Fn(i)*(1-Gn(i))/Pn12(i));
96+
}
97+
if(Pn21(i)*(1-Fn(i))*Gn(i)>0){
98+
Tn = Tn + n*Pn21(i)*log((1-Fn(i))*Gn(i)/Pn21(i));
99+
}
100+
if(Pn22(i)*(1-Fn(i))*(1-Gn(i))>0){
101+
Tn = Tn + n*Pn22(i)*log((1-Fn(i))*(1-Gn(i))/Pn22(i));
102+
}
103+
}
104+
return -2*Tn/(n*n);
105+
}")
106+
107+
#***********************************************************************#
108+
# Function to produce the Kolmogorov–Smirnov, Cramer-von-Mises, and
109+
# Anderson-Darling functional distance between empirical copula and
110+
# independence copula for testing independence versus positive quadrant
111+
# dependence
112+
cppFunction(" NumericVector Rcpp_KCA_01(NumericMatrix X, int n){
113+
NumericVector Fn(n);
114+
NumericVector Gn(n);
115+
for(int i=0; i<n; i++){
116+
for(int j=0; j<n; j++){
117+
if( X(j,0)<= X(i,0)){
118+
Fn(i) = Fn(i)+(double)1/(n+1);
119+
}
120+
if( X(j,1)<= X(i,1)){
121+
Gn(i) = Gn(i)+(double)1/(n+1);
122+
}
123+
}
124+
}
125+
NumericVector Cn(n);
126+
for(int i=0; i<n; i++){
127+
for(int j=0; j<n; j++){
128+
if( Fn(j) <= Fn(i) & Gn(j) <= Gn(i)){
129+
Cn(i) = Cn(i) + (double)1/n;
130+
}
131+
}
132+
}
133+
double KS = 0; double CvM = 0; double AD = 0;
134+
for(int i=0; i<n; i++){
135+
if(Cn(i)>Fn(i)*Gn(i)){
136+
KS = std::max( pow(n,0.5)*(Cn(i)-Fn(i)*Gn(i)), KS);
137+
CvM = pow(Cn(i)-Fn(i)*Gn(i),2) + CvM;
138+
AD = pow(Cn(i)-Fn(i)*Gn(i),2)/(Fn(i)*Gn(i)*(1-Fn(i))*(1-Gn(i))) + AD;
139+
}
140+
}
141+
NumericVector TTS(3);
142+
TTS(0) = KS; TTS(1) = CvM; TTS(2) = AD;
143+
return TTS;
144+
}")
145+
146+
#***********************************************************************#
147+
# Function to generate a random sample from Frank, Clayton, or Gumbel copula
148+
# according to Kandell's tau
149+
RV_CopTau = function(n=10,Tau=0,Copula="Frank"){
150+
if(Copula == "Frank"){
151+
FrankPara = iTau(frankCopula(2),Tau);
152+
myCop.Frank = archmCopula(family = "frank", dim = 2, param = FrankPara)
153+
X = rCopula(n, myCop.Frank)
154+
}
155+
if(Copula == "Clayton"){
156+
ClayPara = iTau(claytonCopula(2),Tau);
157+
myCop.Clay = archmCopula(family = "clayton", dim = 2, param = ClayPara)
158+
X = rCopula(n, myCop.Clay)
159+
}
160+
if(Copula == "Gumbel"){
161+
GumPara = iTau(gumbelCopula(2),Tau);
162+
myCop.Gumbel = archmCopula(family = "gumbel", dim = 2, param = GumPara)
163+
X = rCopula(n, myCop.Gumbel)
164+
}
165+
return(X)
166+
}
167+
168+
#***********************************************************************#
169+
# Function to generate a random sample from the Gaussian copula
170+
# according to Pearson's rho
171+
RV_CopGaussian = function(n=10,Rho=0){
172+
myCop.Gaussian = normalCopula(Rho)
173+
X = rCopula(n, myCop.Gaussian)
174+
return(X)
175+
}
176+
177+
#***********************************************************************#
178+
# Main function to perform the EL, KS, CvM, AD, one-sided Spearman's and
179+
# Kendall's rank tests
180+
# Arguments:
181+
# X = First part of the paired data.
182+
# Y = Second part of the paired data.
183+
# alapha = Significance level (default 0.05).
184+
# graph = TRUE: to produce the scatterplot of the data and the pseudo-observations
185+
# or FALSE (default): no scatterplot
186+
#
187+
# Value:
188+
# IndvsPQD = A table in data frame provides the test statistic, decision
189+
# of rejecting independence (1: reject independence; 0: do not
190+
# reject independence), critical value, and p-value for each
191+
# following test: EL, KS, CvM, AD, one-sided Spearman and
192+
# Kendall's rank tests
193+
# (Large sample size (n=100) may take some time)
194+
IndvsPQD = function(X, Y, alpha=0.05, graph=FALSE){
195+
n = length(X);
196+
if(graph==TRUE)
197+
{
198+
par(mar=c(4.5,5,3,0.5))
199+
par(mfrow=c(1,2))
200+
plot(X,Y,xlab="X", ylab="Y",xlim=range(X),ylim=range(Y),main="Scatterplot of the data")
201+
plot(rank(X)/(n+1),rank(Y)/(n+1),xlab="X", ylab="Y",xlim=c(0,1),ylim=c(0,1),main="Scatterplot of pseudo-observations")
202+
}
203+
Tn = array(,6);
204+
Tn[1] = Rcpp_IP_FF(cbind(X,Y), n);
205+
Tn[2:4] = Rcpp_KCA_01(cbind(X,Y), n);
206+
Tn[5] = cor(X,Y,method="spearman");
207+
Tn[6] = cor(X,Y,method="kendall");
208+
B = 10000; Tn0 = array(,c(B,6));
209+
for(b in 1:B){
210+
X0 = array(runif(2*n),c(n,2));
211+
Tn0[b,1] = Rcpp_IP_FF(X0, n);
212+
Tn0[b,2:4] = Rcpp_KCA_01(X0, n);
213+
Tn0[b,5] = cor(X0[,1],X0[,2],method="spearman");
214+
Tn0[b,6] = cor(X0[,1],X0[,2],method="kendall");
215+
}
216+
217+
CV = array(,6); p_values = array(,6); Reject_Indep = array(,6)
218+
CV = apply(Tn0,2,quantile,1-alpha);
219+
for(j in 1:6){
220+
p_values[j] = mean(Tn0[,j]>Tn[j])
221+
Reject_Indep[j] = (Tn[j]>CV[j])
222+
}
223+
IndvsPQD = data.frame(cbind(Tn,p_values,Reject_Indep,CV))
224+
colnames(IndvsPQD) = c("test_statistic", "p-value", "reject_independence", "critical_value")
225+
rownames(IndvsPQD) = c("EL", "KS", "CvM", "AD", "spearman", "kendall");
226+
print("1: reject independence; 0: do not rejct independence")
227+
return(IndvsPQD)
228+
}
229+
230+

EducationData.R

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#########################################################################
2+
# R functions for testing independence versus positive quadrant #
3+
# dependence corresponding to the manuscript titled, #
4+
# "Testing for positive quadrant dependence." #
5+
# Date: 06/17/2018 #
6+
###########################################################################
7+
source("http://people.stat.sc.edu/wang528/PQD/EL_PQD_Library.R")
8+
EducationData = read.csv("http://people.stat.sc.edu/wang528/PQD/EducationData.csv")
9+
# sample size n=51;
10+
n = length(EducationData$GraRate);
11+
12+
###########################################################################
13+
# Here we provide the scatterplot of the data and pseudo-observations
14+
# to roughly visualize the dependence structure between
15+
# graduation rate and amount spent per person.
16+
par(mar=c(4.5,5,3,0.5))
17+
par(mfrow=c(1,2))
18+
plot(EducationData$GraRate, EducationData$SpentStud,xlab="Graduation rate",ylab="Amount spent per student",main="Scatterplot of the data")
19+
plot(rank(EducationData$GraRate)/(n+1),rank(EducationData$SpentStud)/(n+1),xlab="Graduation rate",ylab="Amount spent per student",main="Scatterplot of pseudo-observations");
20+
21+
# Perform all considered tests:
22+
# It takes less than 2 mins
23+
set.seed(100)
24+
IndvsPQD(EducationData$GraRate,EducationData$SpentStud)
25+
26+
# [1] "1: reject independence; 0: do not rejct independence"
27+
# test_statistic p-value reject_independence critical_value
28+
# EL 1.14408178 0.0958 0 1.43299162
29+
# KS 0.47067846 0.3301 0 0.67176676
30+
# CvM 0.08260912 0.0678 0 0.09068779
31+
# AD 2.77333756 0.2244 0 4.71607090
32+
# spearman 0.21485303 0.0682 0 0.23475566
33+
# kendall 0.15015742 0.0615 0 0.15921569

EducationData.csv

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
State,GraRate,SpentStud
2+
Alabama,0.893,5191
3+
Alaska,0.756,11354
4+
Arizona,0.774,4016
5+
Arkansas,0.849,5455
6+
California,0.82,6255
7+
Colorado,0.773,5240
8+
Connecticut,0.872,11341
9+
Delaware,0.856,8718
10+
District of Columbia,0.685,10400
11+
Florida,0.779,5442
12+
Georgia,0.788,5765
13+
Hawaii,0.816,7554
14+
Idaho,0.789,4125
15+
Illinois,0.856,8331
16+
Indiana,0.871,5510
17+
Iowa,0.908,6697
18+
Kansas,0.857,6137
19+
Kentucky,0.88,5531
20+
Louisiana,0.775,6204
21+
Maine,0.875,7619
22+
Maryland,0.87,8793
23+
Massachusetts,0.873,9614
24+
Michigan,0.798,6722
25+
Minnesota,0.819,7754
26+
Mississippi,0.754,4796
27+
Missouri,0.878,6035
28+
Montana,0.86,6511
29+
Nebraska,0.889,7835
30+
Nevada,0.713,4945
31+
New Hampshire,0.881,9190
32+
New Jersey,0.897,10579
33+
New Mexico,0.686,5436
34+
New York,0.792,14769
35+
North Carolina,0.856,5433
36+
North Dakota,0.866,7783
37+
Ohio,0.807,6655
38+
Oklahoma,0.825,4466
39+
Oregon,0.738,6074
40+
Pennsylvania,0.848,8988
41+
Rhode Island,0.832,8874
42+
South Carolina,0.803,5476
43+
South Dakota,0.839,5254
44+
Tennessee,0.879,5373
45+
Texas,0.89,5344
46+
Utah,0.848,4125
47+
Vermont,0.877,10823
48+
Virginia,0.857,6838
49+
Washington,0.782,6070
50+
West Virginia,0.865,6501
51+
Wisconsin,0.884,6641
52+
Wyoming,0.793,9544

0 commit comments

Comments
 (0)