# extras/opticut-simuls.R In psolymos/opticut: Likelihood Based Optimal Partitioning and Indicator Species Analysis

## Simulations: K=2 case

## The power of a statistical test is
## the probability that it correctly rejects the null hypothesis
## when the null hypothesis is false.

## Here H0 is: no significant diff among hab A and B (pval >= alpha).
## H1: hab A and B are different (pval < alpha).
## in our setup the H0 is false
## so we examine how often we found that H0 is rejected (p < alpha)
## and also that the habitat identified is the right one

library(mefa4)
library(opticut)
library(indicspecies)

B <- 200

oc_sim_K2 <-
function(b1=0.5, b2=0, b3=0.05, b4=0, mu0=10, n=100, pos=TRUE)
{
K <- 2
n <- 2*(n %/% 2)
i <- 1:n
h0 <- rep(LETTERS[1:2], each=n/2)
j <- sample.int(n/2, round((n/2)*(b4/2)))
h1 <- h0
h1[j] <- h0[j+n/2]
h1[j+n/2] <- h0[j]
mu <- c(mu0*(1-b1), mu0)
Mean <- rep(mu, each=n/2)
## this can remove group effects
## same pattern in each group
#    Conf <- runif(n, -0.5*diff(mu), 0.5*diff(mu))
## counteracts the habitat effect
Conf <- runif(n,0,1) * rep(c(diff(mu), -diff(mu)), each=n/2)
Noise <- rnorm(n, 0, 0.1*mu0)
Y <- Mean + b2*Conf + b3*Noise
if (pos)
Y[Y < 0] <- 0
out <- data.frame(Y=Y, h0=h0, h1=h1, i=i, x=i-mean(i),
Mean=Mean, Conf=Conf, Noise=Noise)
attr(out, "settings") <- list(b1=b1, b2=b2, b3=b3, b4=b4,
K=K, n=n, mu0=mu0, pos=pos)
out
}

est_fun1 <- function(..., R=999, level=0.95) {
dat <- oc_sim_K2(...)
m0 <- opticut(Y ~ 1, dat, strata=h1)\$species[[1]]
m1 <- opticut(Y ~ Conf, dat, strata=h1)\$species[[1]]
## IndVal
iv <- multipatt(data.frame(spp1=dat\$Y), dat\$h1, func = "IndVal.g",
duleg=TRUE, control = how(nperm=R))
## Phi coef
rg <- multipatt(data.frame(spp1=dat\$Y), dat\$h1, func = "r.g",
duleg=TRUE, control = how(nperm=R))
## F-ratio
fv <- anova(lm(Y ~ h1, dat))

out <- matrix(NA, 5, 2)
colnames(out) <- c("stat", "pass")
rownames(out) <- c("I0", "IX", "IV", "PH", "FR")
out[1,1] <- m0\$I[1]
out[1,2] <- ifelse(m0\$logLR >= 2, 1, 0)
out[2,1] <- m1\$I[1]
out[2,2] <- ifelse(m1\$logLR >= 2, 1, 0)
out[3,1] <- iv\$sign[1,"stat"]
out[3,2] <- ifelse(iv\$sign[1,"p.value"] <= 1-level, 1, 0)
out[4,1] <- rg\$sign[1,"stat"]
out[4,2] <- ifelse(rg\$sign[1,"p.value"] <= 1-level, 1, 0)
out[5,1] <- fv[1,"F value"]
out[5,2] <- ifelse(fv[1,"Pr(>F)"] <= 1-level, 1, 0)
if (rownames(m0) != "B")
out[1,2] <- -out[1,2]
if (rownames(m1) != "B")
out[2,2] <- -out[2,2]
if (iv\$sign[1,"s.B"] != 1)
out[3,2] <- -out[3,2]
if (rg\$sign[1,"s.B"] != 1)
out[4,2] <- -out[4,2]
## F-ratio cannot tell which is low/high
t(out)
}

vals <- expand.grid(
b2=seq(0, 1, by=0.1),
b4=seq(0, 1, by=0.1))

vals2 <- expand.grid(
b2=seq(0, 1, by=0.1),
b4=seq(0, 1, by=0.1),
b1=c(0.1, 0.5, 0.9),
b3=c(0.1, 0.5, 1))

library(parallel)
cl <- makeCluster(6)
clusterExport(cl, c("est_fun", "est_fun1", "oc_sim_K2", "B","vals","vals2"))
clusterEvalQ(cl, library(opticut))
clusterEvalQ(cl, library(indicspecies))

## contrast (I)
res1 <- parLapply(cl, seq(0.1, 0.9, by=0.1), function(z)
replicate(B, est_fun1(b1=z)))

## confounding
res2 <- parLapply(cl, seq(0, 1, by=0.1), function(z)
replicate(B, est_fun1(b2=z)))

## noise
res3 <- parLapply(cl, seq(0.1, 1, by=0.1), function(z)
replicate(B, est_fun1(b3=z)))

## mixing
res4 <- parLapply(cl, seq(0, 1, by=0.1), function(z)
replicate(B, est_fun1(b4=z)))

## conf & mixing
res5 <- parLapply(cl, 1:nrow(vals), function(z)
replicate(B, est_fun1(b2=vals[z,1], b4=vals[z,2])))

## all
res6 <- parLapply(cl, 1:nrow(vals2), function(z)
replicate(B, est_fun1(b2=vals2[z,1], b4=vals2[z,2],
b1=vals2[z,3], b3=vals2[z,4])))

stopCluster(cl)

#save(vals, vals2, res1, res2, res3, res4, res5, res6, B
#    file="~/Dropbox/collaborations/opticut/R/opticut-simuls.Rdata")

r1 <- sapply(res1, function(z) rowMeans(abs(z[2,,])))
r2 <- sapply(res2, function(z) rowMeans(abs(z[2,,])))
r3 <- sapply(res3, function(z) rowMeans(abs(z[2,,])))
r4 <- sapply(res4, function(z) rowMeans(abs(z[2,,])))
r5 <- sapply(res5, function(z) rowMeans(abs(z[2,,])))
r6 <- sapply(res6, function(z) rowMeans(abs(z[2,,])))

r1v <- sapply(res1, function(z) rowMeans(z[2,,]>0))
r2v <- sapply(res2, function(z) rowMeans(z[2,,]>0))
r3v <- sapply(res3, function(z) rowMeans(z[2,,]>0))
r4v <- sapply(res4, function(z) rowMeans(z[2,,]>0))
r5v <- sapply(res5, function(z) rowMeans(z[2,,]>0))
r6v <- sapply(res6, function(z) rowMeans(z[2,,]>0))
summary(t(r1-r1v))

op <- par(mfrow=c(2,5))
rr <- r6v[,vals2\$b1==0.1 & vals2\$b3==0.1]
for (i in 1:5)
image(unique(vals\$b2), unique(vals\$b4),
matrix(-rr[i,], length(unique(vals\$b2)), length(unique(vals\$b4))),
main=rownames(rr)[i],
xlab="Confounding", ylab="Misclassification")

rr <- r6v[,vals2\$b1==0.9 & vals2\$b3==1]
for (i in 1:5)
image(unique(vals\$b2), unique(vals\$b4),
matrix(-rr[i,], length(unique(vals\$b2)), length(unique(vals\$b4))),
main=rownames(rr)[i],
xlab="Confounding", ylab="Misclassification")
par(op)

r6c <- sapply(res6, function(z) rowSums(z[2,,]>0))
xx <- data.frame(success=as.numeric(t(r6c)),
failure=B-as.numeric(t(r6c)),
method=rep(rownames(r6c), nrow(vals2)), vals2)
xx\$method <- relevel(xx\$method, "IX")
mm <- glm(cbind(success, failure) ~ method + (b1 + b2 + b3 + b4), xx, family=binomial)
summary(mm)
av <- anova(mm)
av\$Perc <- round(100 * anova(mm)\$Deviance / 1041142, 2)
sum(av\$Perc, na.rm=TRUE)
av

## Gaussian example

library(opticut)
Y <- c(0, 0, 3, 0, 2, 3, 0, 5, 5, 6, 3, 4)
z <- as.factor(rep(LETTERS[1:3], each=4))

opticut1(Y, Z=z)
opticut1(ifelse(Y>0,1,0), Z=z, dist="binomial")

print(opticut1(Y, Z=allComb(z)), cut=-Inf)
print(opticut1(ifelse(Y>0,1,0), Z=allComb(z), dist="binomial"), cut=-Inf)

allComb(z)
rankComb(Y, matrix(1, length(Y), 1), z)

set.seed(123)
oc <- opticut(Y ~ 1, strata=z)
uc1 <- uncertainty(oc, 1, type="asymp", B=9999)
#uc2 <- uncertainty(oc, 1, type="boot", B=999)
#uc3 <- uncertainty(oc, 1, type="multi", B=999)

Y <- cbind(Sp1=c(4, 6, 3, 5, 5, 6, 3, 4, 4, 1, 3, 2),
Sp2=c(0, 0, 0, 0, 1, 0, 0, 1, 5, 2, 3, 4),
Sp3=c(0, 0, 3, 0, 2, 3, 0, 5, 5, 6, 3, 4))
oc <- opticut(Y ~ 1, strata=z)
plot(oc)
uc1 <- uncertainty(oc, 1, type="asymp", B=9999)
#uc2 <- uncertainty(oc, 1, type="boot", B=999)
#uc3 <- uncertainty(oc, 2, type="multi", B=999)

## Binomial & uncertainty / mixing

library(opticut)

set.seed(1234)
n <- 5*200
g <- as.factor(rep(LETTERS[1:5], each=n/5))
mu <- qlogis(c(0.2, 0.8))
p <- numeric(n)
p[g=="A"] <- plogis(mu[1])
p[g=="B"] <- plogis(sample(mu, n/5, replace=TRUE, prob=c(0.75, 0.25)))
p[g=="C"] <- plogis(sample(mu, n/5, replace=TRUE, prob=c(0.5, 0.5)))
p[g=="D"] <- plogis(sample(mu, n/5, replace=TRUE, prob=c(0.25, 0.75)))
p[g=="E"] <- plogis(mu[2])
Y <- rbinom(n, 1, p)

B <- 200
BB <- replicate(B, sample.int(n, n, replace=TRUE))

oc <- opticut(Y ~ 1, strata=g, dist="binomial")
uc <- uncertainty(oc, type="multi", B=BB)
uc\$uncertainty[[1]]
oc\$species[[1]]
wplot(uc\$uncertainty[[1]])
plot(uc\$uncertainty[[1]])
table(Y, g)
psolymos/opticut documentation built on April 29, 2018, 10:17 a.m.