# inst/doc/my-vignette.R In mazeinda: Monotonic Association on Zero-Inflated Data

```## ---- echo=FALSE---------------------------------------------------------
numero=10000

## ----echo=FALSE----------------------------------------------------------
### DEFINITIONS OF FORMULAS

prop_11<-function(x,y){
return((length(which(apply(cbind(x, y), 1, function(row) all(row[1] > 0 & row[2] > 0)))))/length(x))
}

prop_01<-function(x,y){
return((length(which(apply(cbind(x, y), 1, function(row) all(row[1] == 0 & row[2] > 0)))))/length(x))
}

prop_10<-function(x,y){
return((length(which(apply(cbind(x, y), 1, function(row) all(row[1] > 0 & row[2] == 0)))))/length(x))
}

tau_T <- function(x, y, estimator = "values", p11 = 0, p01 = 0, p10 = 0) {

nz <- which(apply(cbind(x, y), 1, function(row) all(row[1] > 0 & row[2] > 0)))
nz_data <- cbind(x, y)[nz, ]

if (length(nz) > 1) {
if (length(which(!duplicated(nz_data[, 1]))) == 1 | length(which(!duplicated(nz_data[, 2]))) == 1) {
t_11 <- 0  #positive ties treated as in Kendall's tau-b; i.e. they bring no contribution
} else {
t_11 <- cor(nz_data[, 1], nz_data[, 2], method = "kendall")
}
} else {
t_11 <- 0  #in case of only one positive entry, it is impossible to measure concordance or discordance
}

if (estimator == "values") {
p11 <-prop_11(x,y)
p01 <-prop_01(x,y)
p10 <-prop_10(x,y)
}

p_00 <- 1 - (p11 + p01 + p10)
if (p11 == 1 | p11 == 0 | p01 == 1 | p01 == 0 | p10 == 1 | p10 == 0 | p_00 == 1 | p_00 == 0){
### cases when variance formula cannot be applied
return(cor(x, y, method = "kendall"))
}
return(p11^2 * t_11 + 2 * (p_00 * p11 - p01 * p10))
}

tau_P<-function(x,y){
data=cbind(x,y)
nz=which(apply(data,1,function(row) all(row[1]>0 & row[2]>0)))
p11=length(nz)/dim(data)[1]
nz_data=data[nz,]

Y0= which(apply(data,1,function(row) all(row[2]==0)))
Y1= which(apply(data,1,function(row) all(row[2]>0)))
X0= which(apply(data,1,function(row) all(row[1]==0)))
X1= which(apply(data,1,function(row) all(row[1]>0)))

if (length(Y0)>0 & length(Y1)>0){
count_1=0
for (i in data[Y0,1]){
count_1= count_1 + sum (sapply(data[Y1,1], function(x) X =if({x-i}<0){return (1)} else{return (0)}))
}
p_1= count_1 /(length(Y0)*length(Y1))
}
else{
p_1=0
}

if (length(X0) & length(X1)>0){
count_2=0
for (i in data[X0,2]){
count_2=count_2+sum(sapply(data[X0,2], function(x) if({x-i}<0){return (1)} else{return (0)}))
}
p_2= count_2/(length(X0)*length(X1))
}
else{
p_2=0
}

p01= (length(which(apply(data,1,function(row) all(row[1]==0 & row[2]>0)))))/dim(data)[1]
p10= (length(which(apply(data,1,function(row) all(row[2]==0 & row[1]>0)))))/dim(data)[1]
p_00= (length(which(apply(data,1,function(row) all(row==0)))))/dim(data)[1]

if (length(nz)>1){
if (sd(nz_data[,1])==0 | sd(nz_data[,2])==0) {t_11=0}
else {t_11= cor(nz_data[,1],nz_data[,2],method="kendall")}
}
else {t_11=0}

return (p11^2*t_11+2*(p_00*p11-p01*p10)+ 2*p11*(p10*((2*p_1)-1)+ p01*((2*p_2)-1)))
}

data=cbind(x,y)
nz=which(apply(data,1,function(row) all(row[1]>0 & row[2]>0)))
p11=length(nz)/dim(data)[1]
nz_data=data[nz,]

Y0= which(apply(data,1,function(row) all(row[2]==0)))
Y1= which(apply(data,1,function(row) all(row[2]>0)))
X0= which(apply(data,1,function(row) all(row[1]==0)))
X1= which(apply(data,1,function(row) all(row[1]>0)))

if (length(Y0)>0 & length(Y1)>0){
count_1=0
for (i in data[Y0,1]){
count_1= count_1 + sum (sapply(data[Y1,1], function(x) X =if({x-i}<0){return (1)} else{return (0)}))
}
p_1= count_1 /(length(Y0)*length(Y1))
}
else{
p_1=0
}

if (length(X0) & length(X1)>0){
count_2=0
for (i in data[X0,2]){
count_2=count_2+sum(sapply(data[X0,2], function(x) if({x-i}<0){return (1)} else{return (0)}))
}
p_2= count_2/(length(X0)*length(X1))
}
else{
p_2=0
}

p01= (length(which(apply(data,1,function(row) all(row[1]==0 & row[2]>0)))))/dim(data)[1]
p10= (length(which(apply(data,1,function(row) all(row[2]==0 & row[1]>0)))))/dim(data)[1]
p_00= (length(which(apply(data,1,function(row) all(row==0)))))/dim(data)[1]

return (2*p11*(p10*((2*p_1)-1)+ p01*((2*p_2)-1)))
}

## ----echo=FALSE,message=FALSE--------------------------------------------
# repeat loop in R or repeat function in r
compare_P<-function(infl, goal){
v=c()
sum= 0
while (sum<=goal){
library("gamlss.dist")
x=abs(rBEZI(30, mu = 0.9, sigma = 1, nu = infl))
y=abs(rBEZI(30, mu = 0.9, sigma = 1, nu = infl))
if (sd(x)!= 0 & sd(y)!= 0){ ### cannot test constant vectors, because Kendall's formula does not apply
sum = sum+1
C=cor(x,y,method='kendall')
v=c(v,(C-tau_P(x,y)))
}
}
return(v)
}

compare_T<-function(infl, goal){
v=c()
sum= 0
while (sum<goal){
library("gamlss.dist")
x=abs(rBEZI(30, mu = 0.9, sigma = 1, nu = infl))
y=abs(rBEZI(30, mu = 0.9, sigma = 1, nu = infl))
if (sd(x)!= 0 & sd(y)!= 0){ ### cannot test constant vectors, because Kendall's formula does not apply
sum = sum+1
C=cor(x,y,method='kendall')
v=c(v,(C-tau_T(x,y)))
}
}
return(v)
}

compare_d<-function(infl, goal){
v=c()
sum= 0
while (sum<goal){ ### cannot test constant vectors, because Kendall's formula does not apply
library("gamlss.dist")
x=abs(rBEZI(30, mu = 0.9, sigma = 1, nu = infl))
y=abs(rBEZI(30, mu = 0.9, sigma = 1, nu = infl))
if (sd(x)!= 0 & sd(y)!= 0){
sum = sum+1
}
}
return(v)
}

### histograms of difference of Kendall's Tau and proposed alternative formula

par(mfrow = c(1, 3))
hist(compare_P(0.3, numero),main=paste("tau_b-tau_P, infl=", 0.3),pin=c(1,1))
hist(compare_P(0.5, numero),main=paste("tau_b-tau_P, infl=", 0.5),pin=c(1,1))
hist(compare_P(0.8, numero),main=paste("tau_b-tau_P, infl=", 0.8),pin=c(1,1))

## ----echo=FALSE,message=FALSE--------------------------------------------
par(mfrow = c(1, 3))
hist(compare_T(0.3, numero),main=paste("tau_b-tau_T, infl=", 0.3),pin=c(1,1))
hist(compare_T(0.5, numero),main=paste("tau_b-tau_T, infl=", 0.5),pin=c(1,1))
hist(compare_T(0.8, numero),main=paste("tau_b-tau_T, infl=", 0.8),pin=c(1,1))

## ----echo=FALSE----------------------------------------------------------
### histograms of difference of two formulas
par(mfrow = c(1, 3))

hist(compare_d(0.3, numero),main=paste("tau_P-tau_T, infl=", 0.3),pin=c(5,5))
hist(compare_d(0.5, numero),main=paste("tau_P-tau_T, infl=", 0.5),pin=c(5,5))
hist(compare_d(0.8, numero),main=paste("tau_P-tau_T, infl=", 0.8),pin=c(5,5))

## ---- echo=FALSE, results='asis'-----------------------------------------
m=matrix(c(mean(compare_P(0.3, numero)),mean(compare_P(0.5, numero)),
mean(compare_P(0.8, numero)),mean(compare_T(0.3, numero)),
mean(compare_T(0.5, numero)),mean(compare_T(0.8, numero))),
3, byrow=FALSE)
colnames(m)<- c("tau_p", "tau_t")
row.names(m)<-c("infl=0.3", "infl=0.5", "infl=0.8")
knitr::kable(m, caption="means of the two statistics")

## ------------------------------------------------------------------------
library(mazeinda)

set.seed(234)

x=abs(rBEZI(50, mu = 0.9, sigma = 1, nu = 0.7))
y=abs(rBEZI(50, mu = 0.9, sigma = 1, nu = 0.2))
associate(x,y)
test_associations(x,y)
combine(x,y)

x=matrix(abs(rBEZI(50, mu = 0.9, sigma = 1, nu = 0.6)),5)
y=matrix(abs(rBEZI(30, mu = 0.9, sigma = 1, nu = 0.3)),5)
knitr::kable(associate(x,y), digits=3, caption="associate(x,y)")
knitr::kable(suppressWarnings(test_associations(x,y)),
digits=3, caption="test_associations(x,y)")
knitr::kable(suppressWarnings(combine(x,y)),
digits=3,caption="combine(x,y)")

knitr::kable(suppressWarnings(associate(x,x)),
digits=3, caption="associate all vectors in the matrix x pairwise")

## ------------------------------------------------------------------------
knitr::kable(associate(x,y, estimator="own", p11=0.1,p10=0.1, p01=0.1), digits=3,
caption="parameters specified")
m1=matrix(abs(rBEZI(50, mu = 0.9, sigma = 1, nu = 0.7)),5)
m2=matrix(abs(rBEZI(30, mu = 0.9, sigma = 1, nu = 0.3)),5)
knitr::kable(associate(x,y, estimator="mean", d1=m1,d2=m2), digits=3,
caption="parameters estimated as population mean")
```

## Try the mazeinda package in your browser

Any scripts or data that you put into this service are public.

mazeinda documentation built on May 2, 2019, 3:25 a.m.