GEEMiHC <- function(y, id, covs = NULL, otu.tab, tree = NULL, model, Gamma = c(1,3,5,7,9), Lamda = matrix(c(1, rep(0, 8), rep(1/3, 3), rep(0, 6), rep(1/5, 5), rep(0, 4), rep(1/7, 7), rep(0, 2), rep(1/9, 9)), 5, 9, byrow = T), comp = FALSE, CLR = FALSE, opt.ncl = 30, n.perm = 5000) { if (length(Gamma) != nrow(Lamda) | max(Gamma) != ncol(Lamda) | length(Gamma) > 100 | length(Gamma) < 1) { stop("The number(maximum value) of the Gamma is out of range or not equal to the number of rows(columns) in Lamda. Please enter the correct Gamma or Lamda!") } for (i in 1:nrow(Lamda)) { if (sum(Lamda[i,]) != 1){ stop(paste(paste("The sum of Lamda's", i, sep = " "), paste("th row isn't 1. Please enter the correct Lamda!"), sep = "")) } if (i < nrow(Lamda) && max(Lamda[i, as.numeric((Gamma[i]+1):max(Gamma))]) != 0){ stop("Non-zero values occupy the position of the 0 element in Lamda. Please enter the correct Lamda!") } } if (!comp) { otu.tab <- t(apply(otu.tab, 1, function(x)x/sum(x))) } com.otu.tab <- otu.tab if (CLR) { otu.tab <- t(apply(com.otu.tab, 1, clr)) } n <- length(y) p <- ncol(otu.tab) options(warn=-1) if (is.null(covs)) { fit.GEE.AR <- MGEE(y ~ 1, id=id , family = model, corstr = "AR-1") f.y.GEE.AR <- fitted.values(fit.GEE.AR) r.GEE.AR <- y - f.y.GEE.AR fit.GEE.EX <- MGEE(y ~ 1, id=id , family = model, corstr = "exchangeable") f.y.GEE.EX <- fitted.values(fit.GEE.EX) r.GEE.EX <- y - f.y.GEE.EX fit.GEE.IN <- MGEE(y ~ 1, id=id , family = model, corstr = "independence") f.y.GEE.IN <- fitted.values(fit.GEE.IN) r.GEE.IN <- y - f.y.GEE.IN } else { fit.GEE.AR <- MGEE(y ~ ., id=id , family = model, corstr = "AR-1", as.data.frame(covs)) f.y.GEE.AR <- fitted.values(fit.GEE.AR) r.GEE.AR <- y - f.y.GEE.AR fit.GEE.EX <- MGEE(y ~ ., id=id , family = model, corstr = "exchangeable", as.data.frame(covs)) f.y.GEE.EX <- fitted.values(fit.GEE.EX) r.GEE.EX <- y - f.y.GEE.EX fit.GEE.IN <- MGEE(y ~ ., id=id , family = model, corstr = "independence", as.data.frame(covs)) f.y.GEE.IN <- fitted.values(fit.GEE.IN) r.GEE.IN <- y - f.y.GEE.IN } r.s.GEE.AR <- list() r.s.GEE.EX <- list() r.s.GEE.IN <- list() for (j in 1:n.perm) { ind_r.s <- shuffle(n) r.s.GEE.AR[[j]] <- r.GEE.AR[ind_r.s] r.s.GEE.EX[[j]] <- r.GEE.EX[ind_r.s] r.s.GEE.IN[[j]] <- r.GEE.IN[ind_r.s] } n.Zs.GEE.AR <- as.numeric(r.GEE.AR) %*% otu.tab n.Zs.GEE.EX <- as.numeric(r.GEE.EX) %*% otu.tab n.Zs.GEE.IN <- as.numeric(r.GEE.IN) %*% otu.tab n.Z0s.GEE.AR <- lapply(r.s.GEE.AR, function(x) as.numeric(x %*% otu.tab)) n.Z0s.GEE.EX <- lapply(r.s.GEE.EX, function(x) as.numeric(x %*% otu.tab)) n.Z0s.GEE.IN <- lapply(r.s.GEE.IN, function(x) as.numeric(x %*% otu.tab)) U0s.GEE.AR <- lapply(apply(sapply(r.s.GEE.AR, function(x) return(x %*% otu.tab)),1,list),unlist) U0s.GEE.EX <- lapply(apply(sapply(r.s.GEE.EX, function(x) return(x %*% otu.tab)),1,list),unlist) U0s.GEE.IN <- lapply(apply(sapply(r.s.GEE.IN, function(x) return(x %*% otu.tab)),1,list),unlist) se.GEE.AR <- sapply(U0s.GEE.AR, sd) se.GEE.EX <- sapply(U0s.GEE.EX, sd) se.GEE.IN <- sapply(U0s.GEE.IN, sd) Zs.GEE.AR <- n.Zs.GEE.AR/se.GEE.AR Zs.GEE.EX <- n.Zs.GEE.EX/se.GEE.EX Zs.GEE.IN <- n.Zs.GEE.IN/se.GEE.IN Z0s.GEE.AR <- lapply(n.Z0s.GEE.AR, function(x) x/se.GEE.AR) Z0s.GEE.EX <- lapply(n.Z0s.GEE.EX, function(x) x/se.GEE.EX) Z0s.GEE.IN <- lapply(n.Z0s.GEE.IN, function(x) x/se.GEE.IN) if (class(tree) == "phylo") { CDis <- cophenetic(tree) for (j in 1:nrow(CDis)) { ind.t <- which(CDis[j,] == 0) ind.d <- which(ind.t == j) ind.r <- ind.t[-ind.d] CDis[j,ind.r] <- min(CDis[j,-ind.t])/2 } if (opt.ncl == "gmx") { asw <- numeric(p-1) for (j in 2:(p-1)) { asw[j] <- pam(CDis, j)$silinfo$avg.width } k.best <- which.max(asw) clust.pam <- pam(CDis, k.best, cluster.only=TRUE) } if (opt.ncl == "fmx") { asw <- numeric(p-1) j <- 2 asw[j] <- pam(CDis, j)$silinfo$avg.width while (asw[j-1] < asw[j] & j <= p-1) { j <- j + 1 asw[j] <- pam(CDis, j)$silinfo$avg.width } k.best <- j-1 clust.pam <- pam(CDis, k.best, cluster.only=TRUE) } if (opt.ncl != "gmx" & opt.ncl != "fmx") { asw <- numeric(p-1) for (j in 2:opt.ncl) { asw[j] <- pam(CDis, j)$silinfo$avg.width } k.best <- which.max(asw) clust.pam <- pam(CDis, k.best, cluster.only=TRUE) } Ws.GEE.AR <- rep(NA, p) Ws.GEE.EX <- rep(NA, p) Ws.GEE.IN <- rep(NA, p) for (j in 1:p) { ind.1 <- match(colnames(CDis)[j], names(clust.pam)) ind.2 <- which(clust.pam == as.numeric(clust.pam[ind.1])) ind.3 <- which(ind.2 == ind.1) ind <- ind.2[-ind.3] if (length(ind) == 0) { Ws.GEE.AR[j] <- 1 Ws.GEE.EX[j] <- 1 Ws.GEE.IN[j] <- 1 } else { inv.Ds <- 1/(CDis[j,ind]) abs.Zs.GEE.AR <- abs(Zs.GEE.AR[ind]) abs.Zs.GEE.EX <- abs(Zs.GEE.EX[ind]) abs.Zs.GEE.IN <- abs(Zs.GEE.IN[ind]) Ws.GEE.AR[j] <- sum(inv.Ds*abs.Zs.GEE.AR)/sum(inv.Ds) + 1 Ws.GEE.EX[j] <- sum(inv.Ds*abs.Zs.GEE.EX)/sum(inv.Ds) + 1 Ws.GEE.IN[j] <- sum(inv.Ds*abs.Zs.GEE.IN)/sum(inv.Ds) + 1 } } Ws.GEE.AR <- Ws.GEE.AR/sum(Ws.GEE.AR) Ws.GEE.EX <- Ws.GEE.EX/sum(Ws.GEE.EX) Ws.GEE.IN <- Ws.GEE.IN/sum(Ws.GEE.IN) ahc.o.GEE.AR <- as.list(GEEMiHC.stat(Zs=Zs.GEE.AR, Gamma=Gamma, Lamda=Lamda, Ws=Ws.GEE.AR)) ahc.o.GEE.EX <- as.list(GEEMiHC.stat(Zs=Zs.GEE.EX, Gamma=Gamma, Lamda=Lamda, Ws=Ws.GEE.EX)) ahc.o.GEE.IN <- as.list(GEEMiHC.stat(Zs=Zs.GEE.IN, Gamma=Gamma, Lamda=Lamda, Ws=Ws.GEE.IN)) ahc.p.GEE.AR <- lapply(apply(sapply(Z0s.GEE.AR, function(x) GEEMiHC.stat(Zs=x, Gamma=Gamma, Lamda=Lamda, Ws=Ws.GEE.AR)), 1, list), unlist) ahc.p.GEE.EX <- lapply(apply(sapply(Z0s.GEE.EX, function(x) GEEMiHC.stat(Zs=x, Gamma=Gamma, Lamda=Lamda, Ws=Ws.GEE.EX)), 1, list), unlist) ahc.p.GEE.IN <- lapply(apply(sapply(Z0s.GEE.IN, function(x) GEEMiHC.stat(Zs=x, Gamma=Gamma, Lamda=Lamda, Ws=Ws.GEE.IN)), 1, list), unlist) pvs.GEE.AR <- mapply(function(x, y) (length(which(x < y))+0.01)/(n.perm+0.01), ahc.o.GEE.AR, ahc.p.GEE.AR) pvs.GEE.EX <- mapply(function(x, y) (length(which(x < y))+0.01)/(n.perm+0.01), ahc.o.GEE.EX, ahc.p.GEE.EX) pvs.GEE.IN <- mapply(function(x, y) (length(which(x < y))+0.01)/(n.perm+0.01), ahc.o.GEE.IN, ahc.p.GEE.IN) ind.pvs.GEE.AR <- 1-pchisq(Zs.GEE.AR^2, df=1) ind.pvs.GEE.EX <- 1-pchisq(Zs.GEE.EX^2, df=1) ind.pvs.GEE.IN <- 1-pchisq(Zs.GEE.IN^2, df=1) simes.pv.GEE.AR <- round(min(length(ind.pvs.GEE.AR)*ind.pvs.GEE.AR/rank(ind.pvs.GEE.AR)),4) simes.pv.GEE.EX <- round(min(length(ind.pvs.GEE.EX)*ind.pvs.GEE.EX/rank(ind.pvs.GEE.EX)),4) simes.pv.GEE.IN <- round(min(length(ind.pvs.GEE.IN)*ind.pvs.GEE.IN/rank(ind.pvs.GEE.IN)),4) simes.pv0s.ori.GEE.AR <- unlist(lapply(Z0s.GEE.AR, function(x) {xx <- 1-pchisq(x^2, df=1); min(length(xx)*xx/rank(xx))})) simes.pv0s.ori.GEE.EX <- unlist(lapply(Z0s.GEE.EX, function(x) {xx <- 1-pchisq(x^2, df=1); min(length(xx)*xx/rank(xx))})) simes.pv0s.ori.GEE.IN <- unlist(lapply(Z0s.GEE.IN, function(x) {xx <- 1-pchisq(x^2, df=1); min(length(xx)*xx/rank(xx))})) simes.pv0s.GEE.AR <- rep(NA, n.perm) simes.pv0s.GEE.EX <- rep(NA, n.perm) simes.pv0s.GEE.IN <- rep(NA, n.perm) for (j in 1:n.perm) { simes.pv0s.GEE.AR[j] <- (length(which(simes.pv0s.ori.GEE.AR[-j] < simes.pv0s.ori.GEE.AR[j]))+0.01)/(n.perm+0.01) simes.pv0s.GEE.EX[j] <- (length(which(simes.pv0s.ori.GEE.EX[-j] < simes.pv0s.ori.GEE.EX[j]))+0.01)/(n.perm+0.01) simes.pv0s.GEE.IN[j] <- (length(which(simes.pv0s.ori.GEE.IN[-j] < simes.pv0s.ori.GEE.IN[j]))+0.01)/(n.perm+0.01) } simes.pv.GEE.AR <- (length(which(simes.pv0s.GEE.AR < simes.pv.GEE.AR))+0.01)/(n.perm+0.01) simes.pv.GEE.EX <- (length(which(simes.pv0s.GEE.EX < simes.pv.GEE.EX))+0.01)/(n.perm+0.01) simes.pv.GEE.IN <- (length(which(simes.pv0s.GEE.IN < simes.pv.GEE.IN))+0.01)/(n.perm+0.01) l.Gamma <- length(Gamma) Tu.GEE.AR <- min(pvs.GEE.AR[1:l.Gamma], simes.pv.GEE.AR) Tu.GEE.EX <- min(pvs.GEE.EX[1:l.Gamma], simes.pv.GEE.EX) Tu.GEE.IN <- min(pvs.GEE.IN[1:l.Gamma], simes.pv.GEE.IN) T0u.GEE.AR <- rep(NA, n.perm) T0u.GEE.EX <- rep(NA, n.perm) T0u.GEE.IN <- rep(NA, n.perm) Tw.GEE.AR <- min(pvs.GEE.AR[(l.Gamma+1):(l.Gamma*2)], simes.pv.GEE.AR) Tw.GEE.EX <- min(pvs.GEE.EX[(l.Gamma+1):(l.Gamma*2)], simes.pv.GEE.EX) Tw.GEE.IN <- min(pvs.GEE.IN[(l.Gamma+1):(l.Gamma*2)], simes.pv.GEE.IN) T0w.GEE.AR <- rep(NA, n.perm) T0w.GEE.EX <- rep(NA, n.perm) T0w.GEE.IN <- rep(NA, n.perm) for (j in 1:n.perm) { T0u.o.GEE.AR <- lapply(ahc.p.GEE.AR[1:l.Gamma], function(x) x[j]) T0u.o.GEE.EX <- lapply(ahc.p.GEE.EX[1:l.Gamma], function(x) x[j]) T0u.o.GEE.IN <- lapply(ahc.p.GEE.IN[1:l.Gamma], function(x) x[j]) T0u.p.GEE.AR <- lapply(ahc.p.GEE.AR[1:l.Gamma], function(x) x[-j]) T0u.p.GEE.EX <- lapply(ahc.p.GEE.EX[1:l.Gamma], function(x) x[-j]) T0u.p.GEE.IN <- lapply(ahc.p.GEE.IN[1:l.Gamma], function(x) x[-j]) T0u.GEE.AR[j] <- min(mapply(function(x, y) (length(which(x < y))+0.01)/(n.perm+0.01), T0u.o.GEE.AR, T0u.p.GEE.AR)) T0u.GEE.EX[j] <- min(mapply(function(x, y) (length(which(x < y))+0.01)/(n.perm+0.01), T0u.o.GEE.EX, T0u.p.GEE.EX)) T0u.GEE.IN[j] <- min(mapply(function(x, y) (length(which(x < y))+0.01)/(n.perm+0.01), T0u.o.GEE.IN, T0u.p.GEE.IN)) } for (j in 1:n.perm) { T0w.o.GEE.AR <- lapply(ahc.p.GEE.AR[(l.Gamma+1):(l.Gamma*2)], function(x) x[j]) T0w.o.GEE.EX <- lapply(ahc.p.GEE.EX[(l.Gamma+1):(l.Gamma*2)], function(x) x[j]) T0w.o.GEE.IN <- lapply(ahc.p.GEE.IN[(l.Gamma+1):(l.Gamma*2)], function(x) x[j]) T0w.p.GEE.AR <- lapply(ahc.p.GEE.AR[(l.Gamma+1):(l.Gamma*2)], function(x) x[-j]) T0w.p.GEE.EX <- lapply(ahc.p.GEE.EX[(l.Gamma+1):(l.Gamma*2)], function(x) x[-j]) T0w.p.GEE.IN <- lapply(ahc.p.GEE.IN[(l.Gamma+1):(l.Gamma*2)], function(x) x[-j]) T0w.GEE.AR[j] <- min(mapply(function(x, y) (length(which(x < y))+0.01)/(n.perm+0.01), T0w.o.GEE.AR, T0w.p.GEE.AR)) T0w.GEE.EX[j] <- min(mapply(function(x, y) (length(which(x < y))+0.01)/(n.perm+0.01), T0w.o.GEE.EX, T0w.p.GEE.EX)) T0w.GEE.IN[j] <- min(mapply(function(x, y) (length(which(x < y))+0.01)/(n.perm+0.01), T0w.o.GEE.IN, T0w.p.GEE.IN)) } T.GEE.AR <- min(pvs.GEE.AR, simes.pv.GEE.AR) T0.GEE.AR <- apply(cbind(T0u.GEE.AR, T0w.GEE.AR, simes.pv0s.GEE.AR),1,min) pv.opt.GEE.AR <- (length(which(T0.GEE.AR < T.GEE.AR))+0.01)/(n.perm+0.01) T.GEE.EX <- min(pvs.GEE.EX, simes.pv.GEE.EX) T0.GEE.EX <- apply(cbind(T0u.GEE.EX, T0w.GEE.EX, simes.pv0s.GEE.EX),1,min) pv.opt.GEE.EX <- (length(which(T0.GEE.EX < T.GEE.EX))+0.01)/(n.perm+0.01) T.GEE.IN <- min(pvs.GEE.IN, simes.pv.GEE.IN) T0.GEE.IN <- apply(cbind(T0u.GEE.IN, T0w.GEE.IN, simes.pv0s.GEE.IN),1,min) pv.opt.GEE.IN <- (length(which(T0.GEE.IN < T.GEE.IN))+0.01)/(n.perm+0.01) T <- min(T.GEE.AR, T.GEE.EX, T.GEE.IN) T0 <- apply(cbind(T0.GEE.AR, T0.GEE.EX, T0.GEE.IN),1,min) pv.opt <- (length(which(T0 < T))+0.01)/(n.perm+0.01) names(simes.pv.GEE.AR) <- "Simes.GEE(AR)" names(simes.pv.GEE.EX) <- "Simes.GEE(EX)" names(simes.pv.GEE.IN) <- "Simes.GEE(IN)" item.pvs.GEE.AR <- cbind(pvs.GEE.AR[1:l.Gamma], pvs.GEE.AR[(l.Gamma+1):(l.Gamma*2)]) item.pvs.GEE.EX <- cbind(pvs.GEE.EX[1:l.Gamma], pvs.GEE.EX[(l.Gamma+1):(l.Gamma*2)]) item.pvs.GEE.IN <- cbind(pvs.GEE.IN[1:l.Gamma], pvs.GEE.IN[(l.Gamma+1):(l.Gamma*2)]) colnames(item.pvs.GEE.AR) <- c("uGEEHC(h).AR", "wGEEHC(h).AR") colnames(item.pvs.GEE.EX) <- c("uGEEHC(h).EX", "wGEEHC(h).EX") colnames(item.pvs.GEE.IN) <- c("uGEEHC(h).IN", "wGEEHC(h).IN") rownames(item.pvs.GEE.AR) <- as.character(Gamma) rownames(item.pvs.GEE.EX) <- as.character(Gamma) rownames(item.pvs.GEE.IN) <- as.character(Gamma) ada.pvs <- c(pv.opt.GEE.AR, pv.opt.GEE.EX, pv.opt.GEE.IN, pv.opt) names(ada.pvs) <- c("GEEMiHC(AR)", "GEEMiHC(EX)", "GEEMiHC(IN)", "aGEEMiHC") otu.ids <- colnames(otu.tab) otu.ids <- sub("New.ReferenceOTU", "N", otu.ids) otu.ids <- sub("New.CleanUp.ReferenceOTU", "NC", otu.ids) pvs.GEE.AR <- ind.pvs.GEE.AR pvs.GEE.EX <- ind.pvs.GEE.EX pvs.GEE.IN <- ind.pvs.GEE.IN i0.GEE.AR <- which(pvs.GEE.AR < 0.00000001) i0.GEE.EX <- which(pvs.GEE.EX < 0.00000001) i0.GEE.IN <- which(pvs.GEE.IN < 0.00000001) i1.GEE.AR <- which(pvs.GEE.AR > 0.99999999) i1.GEE.EX <- which(pvs.GEE.EX > 0.99999999) i1.GEE.IN <- which(pvs.GEE.IN > 0.99999999) pvs.GEE.AR[i0.GEE.AR] <- 0.00000001 pvs.GEE.EX[i0.GEE.EX] <- 0.00000001 pvs.GEE.IN[i0.GEE.IN] <- 0.00000001 pvs.GEE.AR[i1.GEE.AR] <- 0.99999999 pvs.GEE.EX[i1.GEE.EX] <- 0.99999999 pvs.GEE.IN[i1.GEE.IN] <- 0.99999999 Is.GEE.AR <- order(order(pvs.GEE.AR)) Is.GEE.EX <- order(order(pvs.GEE.EX)) Is.GEE.IN <- order(order(pvs.GEE.IN)) exp.HC.GEE.AR <- (Is.GEE.AR/p)/sqrt(pvs.GEE.AR*(1-pvs.GEE.AR)/p) exp.HC.GEE.EX <- (Is.GEE.EX/p)/sqrt(pvs.GEE.EX*(1-pvs.GEE.EX)/p) exp.HC.GEE.IN <- (Is.GEE.IN/p)/sqrt(pvs.GEE.IN*(1-pvs.GEE.IN)/p) obs.HC.GEE.AR <- pvs.GEE.AR/sqrt(pvs.GEE.AR*(1-pvs.GEE.AR)/p) obs.HC.GEE.EX <- pvs.GEE.EX/sqrt(pvs.GEE.EX*(1-pvs.GEE.EX)/p) obs.HC.GEE.IN <- pvs.GEE.IN/sqrt(pvs.GEE.IN*(1-pvs.GEE.IN)/p) exp.wHC.GEE.AR <- Ws.GEE.AR*(Is.GEE.AR/p)/sqrt(pvs.GEE.AR*(1-pvs.GEE.AR)/p) exp.wHC.GEE.EX <- Ws.GEE.EX*(Is.GEE.EX/p)/sqrt(pvs.GEE.EX*(1-pvs.GEE.EX)/p) exp.wHC.GEE.IN <- Ws.GEE.IN*(Is.GEE.IN/p)/sqrt(pvs.GEE.IN*(1-pvs.GEE.IN)/p) obs.wHC.GEE.AR <- Ws.GEE.AR*pvs.GEE.AR/sqrt(pvs.GEE.AR*(1-pvs.GEE.AR)/p) obs.wHC.GEE.EX <- Ws.GEE.EX*pvs.GEE.EX/sqrt(pvs.GEE.EX*(1-pvs.GEE.EX)/p) obs.wHC.GEE.IN <- Ws.GEE.IN*pvs.GEE.IN/sqrt(pvs.GEE.IN*(1-pvs.GEE.IN)/p) deviation.uGEEHC.AR <- abs(exp.HC.GEE.AR - obs.HC.GEE.AR) deviation.uGEEHC.EX <- abs(exp.HC.GEE.EX - obs.HC.GEE.EX) deviation.uGEEHC.IN <- abs(exp.HC.GEE.IN - obs.HC.GEE.IN) deviation.wGEEHC.AR <- abs(exp.wHC.GEE.AR - obs.wHC.GEE.AR) deviation.wGEEHC.EX <- abs(exp.wHC.GEE.EX - obs.wHC.GEE.EX) deviation.wGEEHC.IN <- abs(exp.wHC.GEE.IN - obs.wHC.GEE.IN) rank.uGEEHC.AR <- otu.ids[order(deviation.uGEEHC.AR, decreasing = T)] rank.uGEEHC.EX <- otu.ids[order(deviation.uGEEHC.EX, decreasing = T)] rank.uGEEHC.IN <- otu.ids[order(deviation.uGEEHC.IN, decreasing = T)] rank.wGEEHC.AR <- otu.ids[order(deviation.wGEEHC.AR, decreasing = T)] rank.wGEEHC.EX <- otu.ids[order(deviation.wGEEHC.EX, decreasing = T)] rank.wGEEHC.IN <- otu.ids[order(deviation.wGEEHC.IN, decreasing = T)] return(list(graph.els = list(otu.ids = otu.ids, exp.GEEHC.AR = exp.HC.GEE.AR, exp.GEEHC.EX = exp.HC.GEE.EX, exp.GEEHC.IN = exp.HC.GEE.IN, obs.GEEHC.AR = obs.HC.GEE.AR, obs.GEEHC.EX = obs.HC.GEE.EX, obs.GEEHC.IN = obs.HC.GEE.IN, exp.wGEEHC.AR = exp.wHC.GEE.AR, exp.wGEEHC.EX = exp.wHC.GEE.EX, exp.wGEEHC.IN = exp.wHC.GEE.IN, obs.wGEEHC.AR = obs.wHC.GEE.AR, obs.wGEEHC.EX = obs.wHC.GEE.EX, obs.wGEEHC.IN = obs.wHC.GEE.IN), opt.nclust = k.best, rank.order = list(uGEEHC.AR = rank.uGEEHC.AR, uGEEHC.EX = rank.uGEEHC.EX, uGEEHC.IN = rank.uGEEHC.IN, wGEEHC.AR = rank.wGEEHC.AR, wGEEHC.EX = rank.wGEEHC.EX, wGEEHC.IN = rank.wGEEHC.IN), simes.pv.GEE.AR = simes.pv.GEE.AR, simes.pv.GEE.EX = simes.pv.GEE.EX, simes.pv.GEE.IN = simes.pv.GEE.IN, ind.pvs.GEEMiHC.AR = item.pvs.GEE.AR, ind.pvs.GEEMiHC.EX = item.pvs.GEE.EX, ind.pvs.GEEMiHC.IN = item.pvs.GEE.IN, ada.pvs=ada.pvs)) } if (class(tree) != "phylo") { ahc.o.GEE.AR <- as.list(GEEMiHC.stat(Zs=Zs.GEE.AR, Gamma=Gamma, Lamda=Lamda, Ws=NULL)) ahc.o.GEE.EX <- as.list(GEEMiHC.stat(Zs=Zs.GEE.EX, Gamma=Gamma, Lamda=Lamda, Ws=NULL)) ahc.o.GEE.IN <- as.list(GEEMiHC.stat(Zs=Zs.GEE.IN, Gamma=Gamma, Lamda=Lamda, Ws=NULL)) ahc.p.GEE.AR <- lapply(apply(sapply(Z0s.GEE.AR, function(x) GEEMiHC.stat(Zs=x, Gamma=Gamma, Lamda=Lamda, Ws=NULL)), 1, list), unlist) ahc.p.GEE.EX <- lapply(apply(sapply(Z0s.GEE.EX, function(x) GEEMiHC.stat(Zs=x, Gamma=Gamma, Lamda=Lamda, Ws=NULL)), 1, list), unlist) ahc.p.GEE.IN <- lapply(apply(sapply(Z0s.GEE.IN, function(x) GEEMiHC.stat(Zs=x, Gamma=Gamma, Lamda=Lamda, Ws=NULL)), 1, list), unlist) pvs.GEE.AR <- mapply(function(x, y) (length(which(x < y))+0.01)/(n.perm+0.01), ahc.o.GEE.AR, ahc.p.GEE.AR) pvs.GEE.EX <- mapply(function(x, y) (length(which(x < y))+0.01)/(n.perm+0.01), ahc.o.GEE.EX, ahc.p.GEE.EX) pvs.GEE.IN <- mapply(function(x, y) (length(which(x < y))+0.01)/(n.perm+0.01), ahc.o.GEE.IN, ahc.p.GEE.IN) ind.pvs.GEE.AR <- 1-pchisq(Zs.GEE.AR^2, df=1) ind.pvs.GEE.EX <- 1-pchisq(Zs.GEE.EX^2, df=1) ind.pvs.GEE.IN <- 1-pchisq(Zs.GEE.IN^2, df=1) simes.pv.GEE.AR <- min(length(ind.pvs.GEE.AR)*ind.pvs.GEE.AR/rank(ind.pvs.GEE.AR)) simes.pv.GEE.EX <- min(length(ind.pvs.GEE.EX)*ind.pvs.GEE.EX/rank(ind.pvs.GEE.EX)) simes.pv.GEE.IN <- min(length(ind.pvs.GEE.IN)*ind.pvs.GEE.IN/rank(ind.pvs.GEE.IN)) simes.pv0s.ori.GEE.AR <- unlist(lapply(Z0s.GEE.AR, function(x) {xx <- 1-pchisq(x^2, df=1); min(length(xx)*xx/rank(xx))})) simes.pv0s.ori.GEE.EX <- unlist(lapply(Z0s.GEE.EX, function(x) {xx <- 1-pchisq(x^2, df=1); min(length(xx)*xx/rank(xx))})) simes.pv0s.ori.GEE.IN <- unlist(lapply(Z0s.GEE.IN, function(x) {xx <- 1-pchisq(x^2, df=1); min(length(xx)*xx/rank(xx))})) simes.pv0s.GEE.AR <- rep(NA, n.perm) simes.pv0s.GEE.EX <- rep(NA, n.perm) simes.pv0s.GEE.IN <- rep(NA, n.perm) for (j in 1:n.perm) { simes.pv0s.GEE.AR[j] <- (length(which(simes.pv0s.ori.GEE.AR[-j] < simes.pv0s.ori.GEE.AR[j]))+0.01)/(n.perm+0.01) simes.pv0s.GEE.EX[j] <- (length(which(simes.pv0s.ori.GEE.EX[-j] < simes.pv0s.ori.GEE.EX[j]))+0.01)/(n.perm+0.01) simes.pv0s.GEE.IN[j] <- (length(which(simes.pv0s.ori.GEE.IN[-j] < simes.pv0s.ori.GEE.IN[j]))+0.01)/(n.perm+0.01) } simes.pv.GEE.AR <- (length(which(simes.pv0s.GEE.AR < simes.pv.GEE.AR))+0.01)/(n.perm+0.01) simes.pv.GEE.EX <- (length(which(simes.pv0s.GEE.EX < simes.pv.GEE.EX))+0.01)/(n.perm+0.01) simes.pv.GEE.IN <- (length(which(simes.pv0s.GEE.IN < simes.pv.GEE.IN))+0.01)/(n.perm+0.01) l.Gamma <- length(Gamma) Tu.GEE.AR <- min(pvs.GEE.AR, simes.pv.GEE.AR) Tu.GEE.EX <- min(pvs.GEE.EX, simes.pv.GEE.EX) Tu.GEE.IN <- min(pvs.GEE.IN, simes.pv.GEE.IN) T0u.GEE.AR <- rep(NA, n.perm) T0u.GEE.EX <- rep(NA, n.perm) T0u.GEE.IN <- rep(NA, n.perm) for (j in 1:n.perm) { T0u.o.GEE.AR <- lapply(ahc.p.GEE.AR, function(x) x[j]) T0u.o.GEE.EX <- lapply(ahc.p.GEE.EX, function(x) x[j]) T0u.o.GEE.IN <- lapply(ahc.p.GEE.IN, function(x) x[j]) T0u.p.GEE.AR <- lapply(ahc.p.GEE.AR, function(x) x[-j]) T0u.p.GEE.EX <- lapply(ahc.p.GEE.EX, function(x) x[-j]) T0u.p.GEE.IN <- lapply(ahc.p.GEE.IN, function(x) x[-j]) T0u.GEE.AR[j] <- min(mapply(function(x, y) (length(which(x < y))+0.01)/(n.perm+0.01), T0u.o.GEE.AR, T0u.p.GEE.AR)) T0u.GEE.EX[j] <- min(mapply(function(x, y) (length(which(x < y))+0.01)/(n.perm+0.01), T0u.o.GEE.EX, T0u.p.GEE.EX)) T0u.GEE.IN[j] <- min(mapply(function(x, y) (length(which(x < y))+0.01)/(n.perm+0.01), T0u.o.GEE.IN, T0u.p.GEE.IN)) } T0u.minp.GEE.AR <- apply(cbind(T0u.GEE.AR, simes.pv0s.GEE.AR),1,min) T0u.minp.GEE.EX <- apply(cbind(T0u.GEE.EX, simes.pv0s.GEE.EX),1,min) T0u.minp.GEE.IN <- apply(cbind(T0u.GEE.IN, simes.pv0s.GEE.IN),1,min) pv.opt.u.GEE.AR <- (length(which(T0u.minp.GEE.AR < Tu.GEE.AR))+0.01)/(n.perm+0.01) pv.opt.u.GEE.EX <- (length(which(T0u.minp.GEE.EX < Tu.GEE.EX))+0.01)/(n.perm+0.01) pv.opt.u.GEE.IN <- (length(which(T0u.minp.GEE.IN < Tu.GEE.IN))+0.01)/(n.perm+0.01) T <- min(Tu.GEE.AR, Tu.GEE.EX, Tu.GEE.IN) T0 <- apply(cbind(T0u.minp.GEE.AR, T0u.minp.GEE.EX, T0u.minp.GEE.IN),1,min) pv.opt <- (length(which(T0 < T))+0.01)/(n.perm+0.01) names(simes.pv.GEE.AR) <- "Simes.GEE(AR)" names(simes.pv.GEE.EX) <- "Simes.GEE(EX)" names(simes.pv.GEE.IN) <- "Simes.GEE(IN)" item.pvs.GEE.AR <- as.matrix(pvs.GEE.AR) item.pvs.GEE.EX <- as.matrix(pvs.GEE.EX) item.pvs.GEE.IN <- as.matrix(pvs.GEE.IN) colnames(item.pvs.GEE.AR) <- "uGEEHC(h).AR" colnames(item.pvs.GEE.EX) <- "uGEEHC(h).EX" colnames(item.pvs.GEE.IN) <- "uGEEHC(h).IN" rownames(item.pvs.GEE.AR) <- as.character(Gamma) rownames(item.pvs.GEE.EX) <- as.character(Gamma) rownames(item.pvs.GEE.IN) <- as.character(Gamma) ada.pvs <- c(pv.opt.u.GEE.AR, pv.opt.u.GEE.EX, pv.opt.u.GEE.IN, pv.opt) names(ada.pvs) <- c("GEEMiHC(AR)", "GEEMiHC(EX)", "GEEMiHC(IN)", "aGEEMiHC") otu.ids <- colnames(otu.tab) otu.ids <- sub("New.ReferenceOTU", "N", otu.ids) otu.ids <- sub("New.CleanUp.ReferenceOTU", "NC", otu.ids) pvs.GEE.AR <- ind.pvs.GEE.AR pvs.GEE.EX <- ind.pvs.GEE.EX pvs.GEE.IN <- ind.pvs.GEE.IN i0.GEE.AR <- which(pvs.GEE.AR < 0.00000001) i0.GEE.EX <- which(pvs.GEE.EX < 0.00000001) i0.GEE.IN <- which(pvs.GEE.IN < 0.00000001) i1.GEE.AR <- which(pvs.GEE.AR > 0.99999999) i1.GEE.EX <- which(pvs.GEE.EX > 0.99999999) i1.GEE.IN <- which(pvs.GEE.IN > 0.99999999) pvs.GEE.AR[i0.GEE.AR] <- 0.00000001 pvs.GEE.EX[i0.GEE.EX] <- 0.00000001 pvs.GEE.IN[i0.GEE.IN] <- 0.00000001 pvs.GEE.AR[i1.GEE.AR] <- 0.99999999 pvs.GEE.EX[i1.GEE.EX] <- 0.99999999 pvs.GEE.IN[i1.GEE.IN] <- 0.99999999 Is.GEE.AR <- order(order(pvs.GEE.AR)) Is.GEE.EX <- order(order(pvs.GEE.EX)) Is.GEE.IN <- order(order(pvs.GEE.IN)) exp.HC.GEE.AR <- (Is.GEE.AR/p)/sqrt(pvs.GEE.AR*(1-pvs.GEE.AR)/p) exp.HC.GEE.EX <- (Is.GEE.EX/p)/sqrt(pvs.GEE.EX*(1-pvs.GEE.EX)/p) exp.HC.GEE.IN <- (Is.GEE.IN/p)/sqrt(pvs.GEE.IN*(1-pvs.GEE.IN)/p) obs.HC.GEE.AR <- pvs.GEE.AR/sqrt(pvs.GEE.AR*(1-pvs.GEE.AR)/p) obs.HC.GEE.EX <- pvs.GEE.EX/sqrt(pvs.GEE.EX*(1-pvs.GEE.EX)/p) obs.HC.GEE.IN <- pvs.GEE.IN/sqrt(pvs.GEE.IN*(1-pvs.GEE.IN)/p) deviation.uGEEHC.AR <- abs(exp.HC.GEE.AR - obs.HC.GEE.AR) deviation.uGEEHC.EX <- abs(exp.HC.GEE.EX - obs.HC.GEE.EX) deviation.uGEEHC.IN <- abs(exp.HC.GEE.IN - obs.HC.GEE.IN) rank.uGEEHC.AR <- otu.ids[order(deviation.uGEEHC.AR, decreasing = T)] rank.uGEEHC.EX <- otu.ids[order(deviation.uGEEHC.EX, decreasing = T)] rank.uGEEHC.IN <- otu.ids[order(deviation.uGEEHC.IN, decreasing = T)] return(list(graph.els = list(otu.ids = otu.ids, exp.GEEHC.AR = exp.HC.GEE.AR, exp.GEEHC.EX = exp.HC.GEE.EX, exp.GEEHC.IN = exp.HC.GEE.IN, obs.GEEHC.AR = obs.HC.GEE.AR, obs.GEEHC.EX = obs.HC.GEE.EX, obs.GEEHC.IN = obs.HC.GEE.IN), rank.order = list(uGEEHC.AR = rank.uGEEHC.AR, uGEEHC.EX = rank.uGEEHC.EX, uGEEHC.IN = rank.uGEEHC.IN), simes.pv.GEE.AR = simes.pv.GEE.AR, simes.pv.GEE.EX = simes.pv.GEE.EX, simes.pv.GEE.IN = simes.pv.GEE.IN, ind.pvs.GEEMiHC.AR = item.pvs.GEE.AR, ind.pvs.GEEMiHC.EX = item.pvs.GEE.EX, ind.pvs.GEEMiHC.IN = item.pvs.GEE.IN, ada.pvs = ada.pvs)) } }
This R package, GEEMiHC, can be used for detecting sparse microbial association signals adaptively from longitudinal microbiome data. It can be applied to datasets with diverse types of outcomes to study the association between diverse types of host phenotype and microbiome, such BMI (Gaussian distribution), disease status (Binomial distribution) or number of tumors (Poisson distribution). Considering cross-sectional data as a special case of longitudinal data, it can be also applied to cross-sectional data, in which case the results will be consistent with MiHC.
Our R package GEEMiHC requires these existing R packages ("phyloseq", "cluster", "compositions", "permute", "PGEE", "vegan", "ape", "dirmult", "aSPU", "MiSPU") to be pre-imported, where "phyloseq" is available in Bioconductor and the others are available in CRAN. We need to install these package using the following code:
BiocManager::install("phyloseq") install.packages("cluster") install.packages("compositions") install.packages("permute") install.packages("PGEE") install.packages("vegan") install.packages("ape") install.packages("dirmult") install.packages("aSPU") install.packages("MiSPU")
You may install GEEMiHC
from GitHub using the following code:
install.packages("devtools") devtools::install_github("xpjiang-ccnu/GEEMiHC", force=T)
We establish the connection between the marginal mean and other factors by specifying that
\begin{equation} \begin{aligned}[b] g({\mu}{it}) = \textbf{X}{it}^T \alpha + {\textbf{o}}{it}^T \beta, \end{aligned} \end{equation} where $\textbf{o}{it} = (o_{it1}, o_{it2}, \ldots, o_{itm})^T$ represents the abundance data of $m$ OTUs, $\textbf{X}{it} = (1, x{it1}, \ldots, x_{itl})^T$ is $(l+1)$-dimension vectors of covariates and $\mu {it}$ represents the expectation of $y{it}$. And we can obtain the estimator of regression parameter $\theta = (\alpha^T,\beta^T)^T$ by solving the following:
\begin{equation} \begin{aligned}[b] \textbf{S}(\theta) & = \sum_{i=1}^n (\frac{\partial \mu_i(\theta)}{\partial \theta ^T})^T \textbf{V}^{-1}i (\textbf{Y}_i-\mu_i(\theta)) \ & = \sum{i=1}^n {\begin{pmatrix} \textbf{X}{i}^T \ \textbf{o}{i}^T \end{pmatrix}} \textbf{A}_i(\theta)\textbf{V}^{-1}_i(\textbf{Y}_i-\mu_i(\theta))=0. \end{aligned} \end{equation} where $\textbf{V}_i=\textbf{A}^{1/2}_i(\theta)\textbf{R}(\tau)\textbf{A}^{1/2}_i(\theta)$ is the working covariance matrix and $\textbf{R}(\tau)$ is the working correlation matrix.
The GEE-based marginal score statistic is as follows: \begin{equation}\label{Z} \begin{aligned}[b] Z^{\rm GEE}j = \frac{\textbf{o}^T{{.}j}(\textbf{Y}-\hat{\mu}0)}{\sqrt{\textbf{o}^T{{.}j} \textbf{P} \textbf{o}{{.}j}}}, \end{aligned} \end{equation} where $\textbf{o}{.j} = (o_{11j}, o_{12j}, \ldots, o_{n {k_n} j})^T$ and $\textbf{Y} = (\textbf{Y}{1}^T, \ldots, \textbf{Y}_n^T)^T$. The vector of the expected values $\hat{\mu}_0$ consists of the estimator of ${\mu}{it}$
We consider association signals with close phylogenetic relevance to be amplified by adding the weighted factor $w_j$ to the GEE-based HC test (i.e., GEEHC test). Following the HC test and MiHC, the test statistics for the original and weighted GEE-based HC test are as follows: \begin{equation}\label{uGEEHC} \begin{aligned}[b] {\rm uGEEHC}=\max {j \in {{1, 2, \ldots, m}}} {\frac{r_j / m - p_j}{\sqrt{p_j (1-p_j)/m}}}, \end{aligned} \end{equation} \begin{equation}\label{wGEEHC} \begin{aligned}[b] {\rm wGEEHC}=\max {j \in {{1, 2, \ldots, m}}} {\frac{w_j (r_j / m - p_j)}{\sqrt{p_j (1-p_j)/m}}}, \end{aligned} \end{equation} where $$ w_j = \frac{\sum_ {j' \in \zeta (j)/{{j}}}^{} \frac{1}{D_{j, j'}} |Z^{\rm GEE}{j'}|}{\sum {j' \in \zeta (j)/{{j}}}^{} \frac{1}{D_{j, j'}}} +1 $$ is the weight of the $j$th OTU and $D_{j, j'}$ denotes the cophenetic distance and $\zeta (j)$ is a cluster with $j$th OTU by using the partitioning-around-medoids algorithm (PAM).
To adapt to low sparsity levels, we can make appropriate adjustments: \begin{equation}\label{uGEEHC(lambda)} \begin{aligned}[b] {\rm uGEEHC}{(\lambda)} = \sum {j'=1}^\lambda {\lambda_{j'} * {\rm uGEEHC}{j'}}, \end{aligned} \end{equation} \begin{equation}\label{wGEEHC(lambda)} \begin{aligned}[b] {\rm wGEEHC}{(\lambda)} = \sum {j'=1}^\lambda {\lambda{j'} * {\rm wGEEHC}{j'}}, \end{aligned} \end{equation} where $\lambda{j'}$ is the $j'$th weight and $\sum {j'=1}^\lambda \lambda{j'} = 1$.
The form of GEEMiHC is as follows: \begin{equation}\label{GEEMiHC} \begin{aligned}[b] T_{\rm GEEMiHC} = \min (\min {\lambda \in \Gamma} (P{{\rm uGEEHC}{(\lambda)}},P{{\rm wGEEHC}{(\lambda)}}), P{\rm Simes}), \quad \; \end{aligned} \end{equation} where $P_{{\rm uGEEHC}{(\lambda)}}$ and $P{{\rm wGEEHC}{(\lambda)}}$ are the $p$-values for the test statistics ${\rm uGEEHC}{(\lambda)}$ and ${\rm wGEEHC}{(\lambda)}$, respectively; and $P{\rm Simes}$ is the $p$-value for the Simes test \cite{Simes(1986)}, whose model is as follow: \begin{equation}\label{Simes} \begin{aligned}[b] P_{\rm Simes} = T_{\rm Simes} = \min {j \in {{1, 2, \ldots, m}}} {\frac{m p_j}{r_j}}. \end{aligned} \end{equation} To accommodate these uncertain correlation structures, it is valuable to analyze various related structures to obtain more accurate conclusions. The adaptive GEEMiHC (aGEEMiHC) integrates multiple GEEMiHCs with different structures, whose model is as follow: \begin{equation}\label{aGEEMiHC} \begin{aligned}[b] T{\rm aGEEMiHC} = \min (T^{\rm AR}{\rm GEEMiHC}, T^{\rm EX}{\rm GEEMiHC}, T^{\rm IN}{\rm GEEMiHC}), \end{aligned} \end{equation} where $T^{\rm AR}{\rm GEEMiHC}$, $T^{\rm EX}{\rm GEEMiHC}$ and $T^{\rm IN}{\rm GEEMiHC}$ represent test statistics for GEEMiHC(AR), GEEMiHC(EX) and GEEMiHC(IN), respectively (Eq. \ref{GEEMiHC}). For longitudinal data with an unknown underlying correlation within clusters, GEEMiHCs cannot always have robust stability, but aGEEMiHC can perform well.
First of all, we import requisite R packages:
library(cluster) library(permute) library(phyloseq) library(PGEE) library(GEEMiHC)
Then, we import example microbiome data:
data(CD_longitudinal) CD_longitudinal #> phyloseq-class experiment-level object #> otu_table() OTU Table: [ 372 taxa and 81 samples ] #> sample_data() Sample Data: [ 81 samples by 4 sample variables ] #> phy_tree() Phylogenetic Tree: [ 372 tips and 371 internal nodes ] otu.tab <- CD_longitudinal@otu_table tree <- CD_longitudinal@phy_tree y <- sample_data(CD_longitudinal)$label covs <- data.frame(matrix(NA, length(y), 2)) covs[,1] <- as.numeric(sample_data(CD_longitudinal)$age) covs[,2] <- as.factor(sample_data(CD_longitudinal)$smoker) id <- sample_data(CD_longitudinal)$id
The microbiome data we use includes OTU count table, phylogenetic tree and sample information, where sample information contains response Variable (e.g., disease status) and covariates (e.g., age, gender). Next, we test the association between CD status and microbial composition using aGEEMiHC:
set.seed(123) out <- GEEMiHC(y, id, covs=covs, otu.tab=otu.tab, tree=tree, model="binomial", n.perm=5000)
We can find significant association between CD status and microbial composition based on aGEEMiHC (p-value $\approx$ 0.006 < 0.05), and rank order for significant factors (Lachnospiraceae, No. "3586" "7929" "9498" et al., and Ruminococcaceae, No. "1784" "4760" "594" et al.) associated with the host phenotype. Lachnospiraceae and Ruminococcaceae have ever reported differences in these bacteria between CD patients and healthy people.
out$ind.pvs.GEEMiHC.AR #> uGEEHC(h).AR wGEEHC(h).AR #> 1 0.01980196 0.02520195 #> 3 0.02400195 0.02320195 #> 5 0.02280195 0.02140196 #> 7 0.02200196 0.02120196 #> 9 0.02140196 0.02060196 out$ind.pvs.GEEMiHC.EX #> uGEEHC(h).EX wGEEHC(h).EX #> 1 0.02060196 0.02600195 #> 3 0.02480195 0.02400195 #> 5 0.02480195 0.02220196 #> 7 0.02260195 0.02260195 #> 9 0.02180196 0.02140196 out$ind.pvs.GEEMiHC.IN #> uGEEHC(h).IN wGEEHC(h).IN #> 1 0.02520195 0.02700195 #> 3 0.02180196 0.01920196 #> 5 0.01940196 0.01660197 #> 7 0.01780196 0.01540197 #> 9 0.01700197 0.01480197 out$ada.pvs #> GEEMiHC(AR) GEEMiHC(EX) GEEMiHC(IN) aGEEMiHC #> 0.004801990 0.004201992 0.012601975 0.005601989 out$rank.order$uGEEHC.AR[1:50] #> [1] "3586" "8895" "2430" "2556" "7929" "5929" "7825" "1784" "8608" "288" #> [11] "4760" "9498" "6918" "594" "4861" "8792" "587" "6618" "9427" "4400" #> [21] "4193" "817" "1298" "6357" "6737" "9648" "1726" "9634" "1658" "3561" #> [31] "4128" "4645" "5264" "518" "7005" "5961" "4088" "9854" "8086" "1515" #> [41] "8853" "3271" "3119" "9514" "5816" "5102" "2425" "9304" "1863" "9488"
This function can be used for the OTUs count table simulated according to real data. The simulation process is implemented based on the Dirichlet-multinomial model. We first calculate the parameters of the real data, and then generate the corresponding cross-sectional simulated data according to the parameters.
Parameter estimation:
library(dirmult) data("throat.otu.tab", package = "MiSPU") nOTU = 100 otu_sum <- apply(throat.otu.tab, 2, sum) throat.otu.tab.100 <- throat.otu.tab[, order(otu_sum, decreasing = T)[1:nOTU]] parameters <- dirmult(throat.otu.tab.100) parameters
Generation of microbiome data:
otu.tab <- SimulateOTU(throat.otu.tab.100, nSam = 50, parameters, mu = 1000, size = 25)
Sun H, et al. Detecting sparse microbial association signals adaptively from longitudinal microbiome data based on generalized estimating equations. (under review)
Liang K and Zeger S. Longitudinal data analysis using generalized linear models. Biometrika 1986;73:13–22.
Koh H and Zhao N. A powerful microbial group association test based on the higher criticism analysis for sparse microbial association signals. Microbiome 2020;8(1):63.
Paradis E, et al. APE: Analyses of phylogenetics and evolution in R language. Bioinformatics 2004;20(2):289-290.
Reynolds A, et al. Clustering rules: A comparison of partitioning and hierarchical clustering algorithms. J Math Model Algor 2006;5:475–504.
Schirmer M, et al. Microbial genes and pathways in inflammatory bowel disease. Nat Rev Microbiol 2019;17(8):497–511.
Simes RJ. An improved Bonferroni procedure for multiple tests of significance. Biometrika 1986;73(3):751-754.
Vázquez-Baeza Y., et al. Guiding longitudinal sampling in IBD cohorts. Gut 2018;67:1743-1745.
Wang L. GEE analysis of clustered binary data with diverging number of covariates. Ann. Statist. 2011;39:389–417.
Wang L, et al. Penalized generalized estimating equations for high-dimensional longitudinal data analysis. Biometrics 2012;68(2):353-360.
Wu C, et al. An adaptive association test for microbiome data. Genome Med 2016;8(1):56.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.