### multi-split p-value for zeroinfl function (ZIP)
pval.zipath <- function(formula, data, weights, subset, na.action, offset, standardize=TRUE,
family = c("poisson", "negbin", "geometric"),
penalty = c("enet", "mnet", "snet"), gamma.count=3, gamma.zero=3, prop=0.5, trace=TRUE, B=10, ...){
fam <- match.arg(family)
pen <- match.arg(penalty)
j <- 1
count.pval <- zero.pval <- NULL
while(j <= B){
### 1. randomly split the original data into two disjoint groups, din and dout, of equal size
n <- dim(data)[1]
nhalf <- round(n*prop,0)
if(trace) cat("\nmulti-split", j, "\n")
set.seed(j)
s <- sample(n)[1:nhalf]
din <- data[s,]
dout <- data[-s,]
### 2. using only din to estimate the set of active predictors S
fitpen <- tuning.zipath(formula, data=din, family=fam, penalty=pen, gamma.count=gamma.count, gamma.zero=gamma.zero, ...)
rhs1.0 <- attr(fitpen$terms$count, "term.labels")
rhs2.0 <- attr(fitpen$terms$zero, "term.labels")
xsel <- coef(fitpen, which=which.min(fitpen$bic))
if(trace) print(xsel)
rhs1c <- which(abs(xsel$count)[-1] > 0)
rhs2z <- which(abs(xsel$zero)[-1] > 0)
rhs1 <- rhs1.0[rhs1c]
rhs2 <- rhs2.0[rhs2z]
### 3(a) using only dout to fit the selected variables in S with ordinary ZIP and calculate the corresponding p-values Q_j, for j \in S
### 3(b) set the remaining p-values to 1
if(length(rhs1)==0) rhs1tmp <- 1
else {
rhs1tmp <- rhs1[1]
if(length(rhs1) > 1)
for(i in 2:length(rhs1))
rhs1tmp <- paste(rhs1tmp, "+", rhs1[i])
}
if(length(rhs2)==0) rhs2tmp <- 1
else {
rhs2tmp <- rhs2[1]
if(length(rhs2) > 1)
for(i in 2:length(rhs2))
rhs2tmp <- paste(rhs2tmp, "+", rhs2[i])
}
res <- deparse(terms(fitpen$terms$count)[[2]]) ### response variable
#rhs1 <- attr(fit$terms$count, "term.labels")
#rhs2 <- attr(fit$terms$zero, "term.labels")
out <- paste(res, "~", rhs1tmp, "|", rhs2tmp)
# set the environment of the formula (i.e. where should
# R look for variables when data aren't specified?)
environment(out) <- parent.frame()
#fit <- zipath(eval(parse(text=out)), data=data, penalty=penalty)
fit <- try(zeroinfl(eval(parse(text=out)), data=dout, dist=fam))
if(trace) print(summary(fit))
if(inherits(fit, "try-error")){
j <- j+1
B <- B+1
next
}
coef <- summary(fit)$coef
### excluding intercept
if(length(rhs1c) > 0){
pc <- rep(1, length(rhs1.0))
pc[rhs1c] <- coef$count[2:(1+length(rhs1c)),4]
### 4. Define the adjusted p-values as
pc <- pc*(length(rhs1c)+length(rhs2z))
count.pval <- rbind(count.pval, pmin(1, pc))
}
if(length(rhs2z) > 0){
pz <- rep(1, length(rhs2.0))
pz[rhs2z] <- coef$zero[2:(1+length(rhs2z)),4]
pz <- pz*(length(rhs1c)+length(rhs2z))
zero.pval <- rbind(zero.pval, pmin(1, pz))
}
j <- j + 1
}
colnames(count.pval) <- rhs1.0
colnames(zero.pval) <- rhs2.0
Q <- function(x, gamma) min(1, quantile(x, gamma, na.rm=TRUE)/gamma)
P <- function(x){
gam <- seq(0.05, 0.99, by=0.01)
res <- rep(NA, length(gam))
for(i in 1:length(gam))
res[i] <- Q(x, gam[i])
res <- min(1, (1-log(min(gam)))*min(res))
}
count.pval.q <- apply(count.pval, 2, P)
zero.pval.q <- apply(zero.pval, 2, P)
return(list(count.pval=count.pval, zero.pval=zero.pval, count.pval.q=count.pval.q, zero.pval.q=zero.pval.q))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.