Nothing
mele <- function(data, ## dataset (data.frame)
response, ## response variable (binary)
treatment, ## treatment variable (binary) ### 1st phase
BaselineMarker, ## environment variable (continuous) ### 2nd phase
extra=NULL, ## extra variable(s)
phase, ## variable for phase indicator
ind=TRUE, ## independent or non-indepentent
maxit=2000 ## max iteration
){
## check if the data argument is missing
call <- match.call()
if (missing(data)){
data <- environment(formula)
}
## argument validation
if(!is.data.frame(data)){
stop("Argument data must be a data.frame object.")
}else{
colNames <- colnames(data)
if(!(response %in% colNames)){
stop("Response variable not found in the data.")
}else{
if(any(levels(factor(data[, response])) != c("0", "1"))){
warning("Response variable must be either 0 or 1 only.")
}
}
if(!(treatment %in% colNames)){
stop("Treatment variable not found in the data.")
}else{
if(any(levels(factor(data[, treatment])) != c("0", "1"))){
warning("Treatment variable must be either 0 or 1 only.")
}
}
if(!(BaselineMarker %in% colNames)){
stop("BaselineMarker variable not found in the data.")
}
if(!is.null(extra)){
if(!any(extra %in% colNames)){
extraNotFound <- paste(extra[!(extra %in% colNames)], sep="", collapse=", ")
stop(paste("Extra variable(s) not found in the data:", extraNotFound))
}
tmp <- remove_rarevariants(data[,extra])
if (any(tmp))
{
idx <- tmp==TRUE
toremove=NULL
for (i in 1:length(idx))
{
if (idx[i]) toremove <- c(toremove,extra[idx[i]])
}
warnings(paste0(paste(toremove,sep=", "), " were removed due to rare vairant"))
extra <- extra[!idx]
if (length(extra)==0) extra <- NULL
}
}
if(!(phase %in% colNames)){
stop("Phase variable not found in the data.")
}else{
if(any(levels(factor(data[, phase])) != c("1", "2"))){
stop("Phase variable must be either 1 or 2 only.")
}
}
}
##check response vaiable
idx <- data[,response]==0 | data[,response]==1
if (any(idx==FALSE))
{
data <- data[idx,]
warning(paste0(sum(!idx)," rows were removed, whose response !=0 or 1"))
}
##check treatment vaiable
idx <- data[,treatment]==0 | data[,treatment]==1
if (any(idx==FALSE))
{
data <- data[idx,]
warning(paste0(sum(!idx)," rows were removed, whose treatment !=0 or 1"))
}
##check phase variable
idx <- data[,phase]==1 | data[,phase]==2
if (any(idx==FALSE))
{
data <- data[idx,]
warning(paste0(sum(!idx)," rows were removed, whose phase != 1 or 2"))
}
##check if extra variables in phase2 are missing
if (!is.null(extra))
{
nextra_remove <- 0
vars=NULL
for (var in extra)
{
idx <- data[,phase]==2 & (is.na(data[,var]) | is.null(data[,var]))
if (any(idx==TRUE))
{
nextra_remove <- nextra_remove + sum(idx)
data[idx,phase]=3
vars=c(vars,var)
}
}
idx <- data[,phase]==3
if (any(idx==TRUE))
{
data <- data[!idx,]
missingcols=NULL
for (i in 1:length(vars))
{
missingcols=paste0(missingcols," ",vars[i])
}
warning(paste0(nextra_remove, "rows in phase2 were removed due to missing data in",missingcols))
}
}
##check BaselineMarker
idx <- data[,phase]==2 & (is.na(data[,BaselineMarker]) | is.null(data[,BaselineMarker]))
if (any(idx==TRUE))
{
data <- data[!idx,]
warning(paste0(sum(idx)," rows were removed, whose BaselineMarker is missing in phase2"))
}
#indicator of divergence
converged=1
if (remove_rarevariants(data[, BaselineMarker]))
{
warnings("BaselineMarker variable is rare variant")
tmpResult <- data.frame(beta <- rep(NA,length(extra)+4), stder=rep(NA,length(extra)+4), pVal=rep(NA,length(extra)+4))
rownames(tmpResult)[1:4] <- c("(Intercept)","x1","z1","x1:z1")
rownames(tmpResult)[5:nrow(tmpResult)] <- extra
}else
{
isPhase1 <- data[, phase] == 1
isPhase2 <- data[, phase] == 2
a <- data[, response]
b <- data[, treatment]
N1 <- sum(a==1)
N0 <- sum(a==0)
N <- N1 + N0
NXY1 <- sum(b==0 & a==0)
NXY2 <- sum(b==1 & a==0)
NXY3 <- sum(b==0 & a==1)
NXY4 <- sum(b==1 & a==1)
if (NXY1==0 | NXY2==0 | NXY3==0 |NXY4==0)
{
warning(paste0("(x==0 & y==0):",NXY1,
" (x==1 & y==0):",NXY2,
" (x==0 & y==1):",NXY3,
" (x==1 & y==1):",NXY4))
stop("A treatment-response category has zero counts!")
}
y <- data[isPhase2, response]
x <- data[isPhase2, treatment]
z <- data[isPhase2, BaselineMarker]
n0 <- sum(y==0)
n1 <- sum(y==1)
n <- n0 + n1
if(!ind){
## using independence (start)
n11 <- sum(y==1 & x==1)
n10 <- sum(y==1 & x==0)
n01 <- sum(y==0 & x==1)
n00 <- sum(y==0 & x==0)
z0 <- z[x==0]
z1 <- z[x==1]
nz0 <- length(z0)
nz1 <- length(z1)
z1 <- c(z1,z0,z1,z0)
from.y <- c(y[x==1],y[x==0],y[x==1],y[x==0])
from.x <- c(x[x==1],x[x==0],x[x==1],x[x==0])
x1 <- c(rep(1,nz1),rep(0,nz0),rep(1,nz1),rep(0,nz0))
y1 <- c(rep(1,n),rep(0,n))
pzx <- rep(0,2*n)
Nyx <- pzx
pyx <- pzx
Nyx[x1==0 & y1==0] <- NXY1 - sum(x==0 & y==0)
pyx[x1==0 & y1==0] <- NXY1/(NXY1+NXY3)
Nyx[x1==1 & y1==0] <- NXY2 - sum(x==1 & y==0)
pyx[x1==1 & y1==0] <- NXY2/(NXY2+NXY4)
Nyx[x1==0 & y1==1] <- NXY3 - sum(x==0 & y==1)
pyx[x1==0 & y1==1] <- NXY3/(NXY1+NXY3)
Nyx[x1==1 & y1==1] <- NXY4 - sum(x==1 & y==1)
pyx[x1==1 & y1==1] <- NXY4/(NXY2 + NXY4)
weight <- rep(0,length(y1))
for (i in 1:length(y1)) weight[i] <- 1/sum(x==from.x[i] & y==from.y[i])
pzx[from.y==1 & from.x==1] <- weight[from.y==1 & from.x==1] * unique(pyx[y1==1 & x1==1])
pzx[from.y==1 & from.x==0] <- weight[from.y==1 & from.x==0] * unique(pyx[y1==1 & x1==0])
pzx[from.y==0 & from.x==1] <- weight[from.y==0 & from.x==1] * unique(pyx[y1==0 & x1==1])
pzx[from.y==0 & from.x==0] <- weight[from.y==0 & from.x==0] * unique(pyx[y1==0 & x1==0])
## using independence (stop)
}else{
## without using independence (start)
z.from.y <- ifelse(y==1,1,0)
x1 <- c(rep(1,n),rep(0,n),rep(1,n),rep(0,n))
y1 <- c(rep(1,2*n),rep(0,2*n))
z1 <- rep(z,4)
z.from.y<- rep(z.from.y,4)
pzw <- ifelse(z.from.y==1,1/n1 *(N1/N), 1/n0 *(N0/N) )
Nyx <- rep(0,length(y1))
for (i in 1:length(y1)) {
if (x1[i]==0 & y1[i]==0) {
Nyx[i] <- NXY1 - sum(x==0 & y==0)
}
if (x1[i]==1 & y1[i]==0) {
Nyx[i] <- NXY2 - sum(x==1 & y==0)
}
if (x1[i]==0 & y1[i]==1) {
Nyx[i] <- NXY3 - sum(x==0 & y==1)
}
if (x1[i]==1 & y1[i]==1) {
Nyx[i] <- NXY4 - sum(x==1 & y==1)
}
}
## without using independence (end)
}
X1 <- model.matrix(~ x1 + z1 + x1*z1) ## 1st phase
X <- model.matrix(~ x + z + x*z) ## 2nd phase
if(is.null(extra)){
## without adjusting the covariates
glmData <- data.frame(cbind(y, x, z, x*z))
}else{
## adjusting the covariates
covar <- data[isPhase2, extra]
for(var in extra){
if(is.factor(covar[, var])){
covar[, var] <- factor(covar[, var])
}
}
if(!ind){
covar1 <- rbind(covar[x==1,],covar[x==0,],covar[x==1,],covar[x==0,])
}else{
covar1 <- rbind(covar, covar, covar, covar)
}
#### example from JD's code
##X1 <- model.matrix(~ x1 + z1 + x1*z1 +
## covar1$age +
## factor(covar1$lmsepi)
## )
## generalized form TYL
tmpX1 <- model.matrix(as.formula(paste("~", paste(extra, collapse=" + "))), data=covar1)
toDrop <- match("(Intercept)", colnames(tmpX1))
X1 <- cbind(X1, tmpX1[, -toDrop])
#### example from JD's code
##X <- model.matrix(~ x + z + x*z +
## covar$age +
## factor(covar$lmsepi) +
## )
tmpX <- model.matrix(as.formula(paste("~", paste(extra, collapse=" + "))), data=covar)
toDrop <- match("(Intercept)", colnames(tmpX))
X <- cbind(X, tmpX[, -toDrop])
glmData <- data.frame(cbind(y, x, z, x*z, data[isPhase2, extra]))
}
wgt0 <- ifelse(y==1,N1/n1,N0/n0)
##sfit <- glm(y~x+z + x*z+ covar$age + factor(covar$lmsepi) + factor(covar$diabtrt) + factor(covar$hyp) + covar$syst+covar$dias, family=binomial,weights=wgt0)
#*options(warn=-1) #remvove message:non-integer #successes in a binomial glm!
suppressWarnings(sfit <- glm(y~., data=glmData, family=binomial, weights=wgt0))
betastart <- sfit$coef
#*options(warn=0)
diff <- 1
tol <- 1e-7
it <- 0
beta <- betastart
if(!ind){
out <- scoreMat(X1,X,y1,x1,y,pzx,beta,Nyx)
## with IND assumption (start)
while (diff > tol & it<=maxit & is.finite(sum(sum(out$cheese))) & !is.na(sum(sum(out$cheese)))) {
it <- it +1
#cat(it,"..")
out <- scoreMat(X1,X,y1,x1,y,pzx,beta,Nyx)
if (is.finite(sum(sum(out$cheese))) & !is.na(sum(sum(out$cheese))))
{
betanew <- beta + qr.solve(out$cheese,tol=1e-21) %*% out$score
diff <- max(abs(beta-betanew))
beta <- betanew
}else converged=0
}
if (it >maxit) {
converged=0
}
if (converged==0) warning("function diverges!")
if (converged==1)
{
cheese <- out$cheese
fit1 <- drop(exp(X1 %*% beta)/(1+ exp(X1 %*% beta)))
pfit1 <- ifelse(y1==1,fit1,1-fit1)
D1 <- ifelse(y1==1,pfit1*(1-pfit1),-pfit1*(1-pfit1))*X1
DZW1 <- D1*pzx
p <- ncol(X1)
D2 <- matrix(0,length(y1),p)
D2[x1==0 & y1==0,] <- matrix(rep(apply(DZW1[x1==0 & y1==0,],2,sum),nz0),nrow=nz0,byrow=TRUE)
D2[x1==1 & y1==0,] <- matrix(rep(apply(DZW1[x1==1 & y1==0,],2,sum),nz1),nrow=nz1,byrow=TRUE)
D2[x1==0 & y1==1,] <- matrix(rep(apply(DZW1[x1==0 & y1==1,],2,sum),nz0),nrow=nz0,byrow=TRUE)
D2[x1==1 & y1==1,] <- matrix(rep(apply(DZW1[x1==1 & y1==1,],2,sum),nz1),nrow=nz1,byrow=TRUE)
pyx <- rep(0,length(y1))
pyxz<- pfit1*pzx
pyx[x1==0 & y1==0] <- sum(pyxz[x1==0 & y1==0])
pyx[x1==1 & y1==0] <- sum(pyxz[x1==1 & y1==0])
pyx[x1==0 & y1==1] <- sum(pyxz[x1==0 & y1==1])
pyx[x1==1 & y1==1] <- sum(pyxz[x1==1 & y1==1])
W <- (D1/pyx - D2/(pyx)^2 *pfit1)*Nyx * weight
Wbar <- matrix(0,n,p)
num <- rep(1:n,2)
for (i in 1:n) Wbar[i,] <- apply(W[num==i,],2,sum)
from.y <- c(y[x==1],y[x==0])
from.x <- c(x[x==1],x[x==0])
m1 <- apply( Wbar[from.y==1 & from.x==1,],2,mean)*NXY4/(NXY2+NXY4) + apply( Wbar[from.y==1 & from.x==0,],2,mean)*NXY3/(NXY1+NXY3)
nxy <- sum(from.y==1 & from.x==1)
d1 <- NXY4/(NXY2+NXY4)*(Wbar[from.y==1 & from.x==1,] - matrix(m1,nrow=nxy,ncol=p,byrow=TRUE))
nxy <- sum(from.y==1 & from.x==0)
d2 <- NXY3/(NXY1+NXY3)*(Wbar[from.y==1 & from.x==0,] - matrix(m1,nrow=nxy,ncol=p,byrow=TRUE))
I1 <- t(rbind(d1,d2)) %*% (rbind(d1,d2))
m1 <- apply( Wbar[from.y==0 & from.x==1,],2,mean)*NXY2/(NXY2+NXY4) + apply( Wbar[from.y==0 & from.x==0,],2,mean)*NXY1/(NXY1+NXY3)
nxy <- sum(from.y==0 & from.x==1)
d3 <- NXY2/(NXY2+NXY4)*(Wbar[from.y==0 & from.x==1,] - matrix(m1,nrow=nxy,ncol=p,byrow=TRUE))
nxy <- sum(from.y==0 & from.x==0)
d4 <- NXY1/(NXY1+NXY3)*(Wbar[from.y==0 & from.x==0,] - matrix(m1,nrow=nxy,ncol=p,byrow=TRUE))
I2 <- t(rbind(d3,d4)) %*% (rbind(d3,d4))
I <- I1 + I2
stder <- round(sqrt(diag(qr.solve(cheese,tol=1e-21) %*% (cheese+I) %*% qr.solve(cheese,tol=1e-21))),4)
##beta <- round(beta,4)
## with IND assumption (stop)
}
}else{
## without IND assumption (start)
it <- 0
out <- scoreMat(X1,X,y1,x1,y,pzw,beta,Nyx)
while (diff > tol & it<=maxit & is.finite(sum(sum(out$cheese))) & !is.na(sum(sum(out$cheese)))) {
it <- it +1
out <- scoreMat(X1,X,y1,x1,y,pzw,beta,Nyx)
if (it<=maxit & is.finite(sum(sum(out$cheese))) & !is.na(sum(sum(out$cheese))))
{
betanew <- beta + qr.solve(out$cheese,tol=1e-21) %*% out$score
diff <- max(abs(beta-betanew))
beta <- betanew
}else converged=0
}
if (it > maxit) {
converged=0
}
if (converged==0) warning("function diverges!")
if (converged==1)
{
cheese <- out$cheese
I3 <- VarDensity(beta,X1,y1,x1,pzw,Nyx,z.from.y,n1,n0,n, N1, N0, N)
CovMat <- qr.solve(cheese,tol=1e-21) %*% (cheese+I3) %*% qr.solve(cheese,tol=1e-21)
##stder <- round(sqrt(diag(CovMat)),3)
##beta <- round(beta,3)
stder <- sqrt(diag(CovMat))
## without IND assumption (start)
}
}
#in divergent cases,generate NAs
if (converged==0)
{
tmpResult <- data.frame(beta <- rep(NA,length(extra)+4), stder=rep(NA,length(extra)+4), pVal=rep(NA,length(extra)+4))
rownames(tmpResult)[1:4] <- c("(Intercept)","x1","z1","x1:z1")
rownames(tmpResult)[5:nrow(tmpResult)] <- extra
}else
{
pVal <- 2*(1-pnorm(abs(beta/stder)))
##pval1 <- round(2*(1-pnorm(abs(beta[2]/stder[2]))),4)
##pval2 <- round(2*(1-pnorm(abs(beta[3]/stder[3]))),4)
##pval3 <- round(2*(1-pnorm(abs(beta[4]/stder[4]))),4)
##cat(beta[2],"(",stder[2],")","&",pval1, "&", beta[3],"(",stder[3],")","&",pval2, "&",beta[4],"(",stder[4],")","&",pval3,"\n")
tmpResult <- data.frame(beta=round(beta, 4), stder=round(stder, 4), pVal=pVal)
}
}
rowName <- rownames(tmpResult)
toRename <- match(c("x1", "z1", "x1:z1"), rowName)
rowName[toRename] <- c(paste(treatment, "(Treatment)"),
paste(BaselineMarker, "(BaselineMarker)"),
paste(treatment, BaselineMarker, sep=":")
)
rownames(tmpResult) <- rowName
return(tmpResult)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.