R/2-simrun.R

#' library(sfdr, lib.loc = "/home/ntyet/R/x86_64-pc-linux-gnu-library/3.3")
#' library(repfdr, lib.loc = "/home/ntyet/R/x86_64-pc-linux-gnu-library/3.3")
#' library(ggplot2)
#' library(dplyr)
#'
#' source("../fxns.R");
#' library(GPA);
#' ##devtools::install_github('barakbri/repfdr')
#' library(repfdr);
#' dyn.load("/fsdr/fsdr.so");
#' source("../fsdr/fsdr.R");
#'
#' source("/Users/Yet/Box Sync/Collaboration/DaveZhao/v2/fxns.R");
#' library(GPA);
#' ##devtools::install_github('barakbri/repfdr')
#' library(repfdr);
#' dyn.load("/Users/Yet/Box Sync/Collaboration/DaveZhao/v2/fsdr/fsdr.so");
#' source("/Users/Yet/Box Sync/Collaboration/DaveZhao/v2/fsdr/fsdr.R");
#'
#'
#'
#'
#' #' Simulation From t-4 Distribution
#' #'
#' #' This function runs a simulation study to compare the
#' #' performance of our proposed method and Zhao's 2017 method
#' #' in detecting simultaneous signals in two independent
#' #' experiments.
#' #' @param m The number of genes/features.
#' #' @param m11,m10,m01 The number of simultaneous signals,
#' #' the number of features that are true signal in experiment 1 and
#' #' are true nulls in experient 2, the number of features that are true
#' #' nulls in experiment 1 and true signals in experiment 2.
#' #' @param mu1_mean,mu1_sd,mu2_mean,mu2_sd The mean and standard deviation
#' #' of normal distribution to simulate the size of signals in each experiment.
#' #' @param t1_df,t2_df The degree of freedom in t-distribution of the first
#' #' and second experiments.
#' #' @param n_sim the number of simulations to run.
#' #' @param print.progress True or False to print the process of iteration
#' #' from 1 to n_sim.
#' #' @param ncores the number of cores for running parallel.
#' #'
#' #' @return simulation results
#' #' @import plyr
#' #' @import ssa
#' #' @import parallel
#' #' @import stats
#' #' @import repfdr
#' #' @export
#' #' @examples
#' #' m = 1000; m11 = 250; m10 = 75; m01 = 25;
#' #' mu1_mean = 6; mu1_sd = 1;
#' #' mu2_mean = 6; mu2_sd = 1;
#' #' t1_df = 4; t2_df = 4;
#' #' n_sim = 2; ncores  = 1; print.progress = TRUE
#' #' pm <- proc.time()
#' #' simout <- sim_sfdr(m, m11, m10, m01,
#' #'                    mu1_mean, mu1_sd,
#' #'                    mu2_mean, mu2_sd,
#' #'                    t1_df, t2_df,
#' #'                    n_sim, ncores,
#' #'                    print.progress)
#' #' apply(simout, 2, mean)
#' #' proc.time() - pm
#'
#'
#' sim_sfdr <- function(m, m11, m10, m01, mu1_mean, mu1_sd, mu2_mean, mu2_sd, t1_df, t2_df, n_sim, ncores = 1, print.progress = F){
#'   mu1 <- mu2 <- rep(0, m)
#'   if(m11>0){
#'     mu1[1:m11] <- rnorm(n = m11, mean = mu1_mean, sd = mu1_sd)
#'     mu2[1:m11] <- rnorm(n = m11, mean = mu2_mean, sd = mu2_sd)
#'   }
#'   if(m10>0){
#'     mu1[(m11+1):(m11+m10)] <- rnorm(n = m10, mean = mu1_mean, sd = mu1_sd)
#'   }
#'   if(m01>0){
#'     mu2[(m11+m10+1):(m11+m10+m01)] <- rnorm(n = m01, mean = mu2_mean, sd = mu2_sd)
#'   }
#'   if (print.progress){
#'     out <- parallel::mclapply(1:n_sim, function(j){
#'       set.seed(j)
#'       print(j)
#'       pp <- plyr::laply(1:m, function(i){
#'         t1 <- rt(n = 1, df = t1_df, ncp = mu1[i])
#'         t2 <- rt(n = 1, df = t2_df, ncp = mu2[i])
#'         p1 <- 2*pt(q = abs(t1), df = t1_df, ncp = 0, lower.tail = FALSE)
#'         p2 <- 2*pt(q = abs(t2), df = t2_df, ncp = 0, lower.tail = FALSE)
#'         c(p1, p2)
#'       })
#'       int_sfdr_out <- int_sfdr(pp[,1], pp[,2])
#'       # Zhao method
#'       R <- sum(int_sfdr_out <= 0.05)
#'       V <- sum(int_sfdr_out <= 0.05 & c(rep(0, m11), rep(1, m-m11)))
#'       FDR <- V/max(1, R)
#'       S <-  R - V
#'       tt <- ssa::fsdr(pp[,1], pp[,2], m1 = m/2, m2 = m/2, alpha = .05)
#'       R0 <- sum(pp[,1] <=tt[1] & pp[,2] <= tt[2])
#'       V0 <- sum(which(pp[,1] <=tt[1] & pp[,2] <= tt[2])>m11)
#'       FDR0 <- V0/max(1,R0)
#'
#'       # Heller and Yekutieli Method
#'       # zmat <- qnorm(pp)
#'       # ztobinsout <- repfdr::ztobins(zmat, n.association.status = 3, n.bins = 120, type = 0,
#'       #                       df = 7, central.prop = 0.9)
#'       # pbz_sim <- ztobinsout$pdf.binned.z
#'       # bz_sim <- ztobinsout$binned.z.mat
#'       # output.rep <- repfdr::repfdr(pbz_sim, bz_sim, "replication", control = em.control(verbose = FALSE))
#'       # BayesFdr.rep <- output.rep$mat[,"Fdr"]
#'       zmat <- abs(qnorm(pp/2))
#'       ztobinsout <- repfdr::ztobins(zmat, n.association.status = 3, n.bins = 120, type = 0,
#'                                     df = 7, central.prop = 0.9)
#'       pbz_sim <- ztobinsout$pdf.binned.z[,,2:3]
#'       bz_sim <- ztobinsout$binned.z.mat
#'       output.rep <- repfdr::repfdr(pbz_sim, bz_sim, "replication")
#'       BayesFdr.rep <- output.rep$mat[,"Fdr"]
#'
#'       true.rep   <- c(rep(1, m11), rep(0, m - m11))
#'       Rej <- (BayesFdr.rep <= 0.05)
#'       S1 <- sum(Rej * true.rep)
#'       V1 <- sum(Rej * !true.rep)
#'       R1 <- sum(Rej)
#'       FDR1 <- V1/max(1,R1)
#'
#'
#'       out1 <- c(S = S, R = R, V = V, FDR = FDR,
#'                 S0 = R0-V0, R0 = R0, V0 = V0, FDR0 = FDR0,
#'                 S1 = S1, R1 = R1, V1 = V1, FDR1 = FDR1)
#'       out1
#'     }, mc.cores = ncores
#'     )
#'   } else{
#'     out <- parallel::mclapply(1:n_sim, function(j){
#'       set.seed(j)
#'       pp <- plyr::laply(1:m, function(i){
#'         t1 <- rt(n = 1, df = t1_df, ncp = mu1[i])
#'         t2 <- rt(n = 1, df = t2_df, ncp = mu2[i])
#'         p1 <- 2*pt(q = abs(t1), df = t1_df, ncp = 0, lower.tail = FALSE)
#'         p2 <- 2*pt(q = abs(t2), df = t2_df, ncp = 0, lower.tail = FALSE)
#'         c(p1, p2)
#'       })
#'       int_sfdr_out <- int_sfdr(pp[,1], pp[,2])
#'
#'       # Zhao method
#'       R <- sum(int_sfdr_out <= 0.05)
#'       V <- sum(int_sfdr_out <= 0.05 & c(rep(0, m11), rep(1, m-m11)))
#'       FDR <- V/max(1, R)
#'       S <-  R - V
#'       tt <- ssa::fsdr(pp[,1], pp[,2], m1 = m/2, m2 = m/2, alpha = .05)
#'       R0 <- sum(pp[,1] <=tt[1] & pp[,2] <= tt[2])
#'       V0 <- sum(which(pp[,1] <=tt[1] & pp[,2] <= tt[2])>m11)
#'       FDR0 <- V0/max(1,R0)
#'
#'       # Heller and Yekutieli Method
#'       zmat <- qnorm(pp)
#'       ztobinsout <- repfdr::ztobins(zmat, n.association.status = 2, n.bins = 120, type = 0,
#'                                     df = 7, central.prop = 0.5)
#'       pbz_sim <- ztobinsout$pdf.binned.z
#'       bz_sim <- ztobinsout$binned.z.mat
#'       output.rep <- repfdr::repfdr(pbz_sim, bz_sim, "replication")
#'       BayesFdr.rep <- output.rep$mat[,"Fdr"]
#'       true.rep   <- c(rep(1, m11), rep(0, m - m11))
#'       Rej <- (BayesFdr.rep <= 0.05)
#'       S1 <- sum(Rej * true.rep)
#'       V1 <- sum(Rej * !true.rep)
#'       R1 <- sum(Rej)
#'       FDR1 <- V1/max(1,R1)
#'
#'
#'       out1 <- c(S = S, R = R, V = V, FDR = FDR,
#'                 S0 = R0-V0, R0 = R0, V0 = V0, FDR0 = FDR0,
#'                 S1 = S1, R1 = R1, V1 = V1, FDR1 = FDR1)
#'       out1
#'     }, mc.cores = ncores
#'     )
#'   }
#'
#'   out <- do.call("rbind",out)
#'   out
#' }
#'
#'
#'
#'
#' m = 10000
#' mlist <- list(list(m11 <- c(25, 50, 500, 1000),
#'                    m01 <- c(25, 75, 1000)),
#'               list(m11 <- c(0, 25, 500, 1000),
#'                    m01 <- c(475, 975, 2000)))
#'
#' # mlist <- list(list(m11 <- c(250, 150, 500, 1000),
#' #                    m01 <- c(25, 75, 1000)),
#' #               list(m11 <- c(250, 325, 500, 1000),
#' #                    m01 <- c(475, 975, 2000)))
#'
#' mean_list <- c(2, 4, 6, 10)
#' mu1_sd = 1;
#' mu2_sd = 1;
#' t1_df = 4; t2_df = 4;
#' n_sim = 96; ncores  = 16; print.progress = FALSE
#'
#'
#' # m = 1000
#' # mlist <- list(list(m11 <- c(2, 5, 7, 10),
#' #                    m01 <- c(2, 5, 10)),
#' #               list(m11 <- c(0, 2, 5, 10),
#' #                    m01 <- c(4, 9, 20)))
#' #
#' # mean_list <- c(2, 4, 6)
#' # mu1_sd = 1;
#' # mu2_sd = 1;
#' # t1_df = 4; t2_df = 4;
#' # n_sim = 2; ncores  = 1; print.progress = FALSE
#'
#'
#' for(i in 1:2){
#'   for(j in 1:4){
#'     m11 <- mlist[[i]][[1]]
#'     m01 <- m10<- mlist[[i]][[2]]
#'     mu1_mean <- mu2_mean <- mean_list[j]
#'     sim_setting <- expand.grid(m = m, m11 = m11, m10 = m10, m01 = m01)
#'     sim_setting$Scenario <- paste(sim_setting$m,sim_setting$m11, sim_setting$m10, sim_setting$m01, sep = "_")
#'     out <- plyr::llply(1:nrow(sim_setting), function(k){
#'       cat("Scenario = ", k, "\n")
#'       m11 <- sim_setting$m11[k]
#'       m10 <- sim_setting$m10[k]
#'       m01 <- sim_setting$m01[k]
#'       Scenario <- sim_setting$Scenario[k]
#'       simout <- sim_sfdr(m, m11, m10, m01,
#'                          mu1_mean, mu1_sd,
#'                          mu2_mean, mu2_sd,
#'                          t1_df, t2_df,
#'                          n_sim, ncores,
#'                          print.progress)
#'       simout <- data.frame(simout)
#'       simout0 <- data.frame(NTP = c(simout$S, simout$S0, simout$S1),
#'                             FDR = c(simout$FDR, simout$FDR0, simout$FDR1),
#'                             Method = c(rep("Orr", n_sim), rep("Zhao", n_sim), rep("HY", n_sim)),
#'                             Scenario = rep(Scenario, 3*n_sim))
#'     })
#'     out <- do.call("rbind", out)
#'     saveRDS(out, file = paste0("Ressim_m_", m, "_i_", i,
#'                                "_j_", j,  "_mu1_",
#'                                mu1_mean, "_mu2_", mu2_mean, ".rds"))
#'     out_red <- out%>%dplyr::group_by(Method, Scenario)%>%
#'       dplyr::summarise(Ave_NTP = mean(NTP),
#'                        Ave_FDR = mean(FDR),
#'                        SD_NTP = sd(NTP)/sqrt(n_sim),
#'                        SD_FDR = sd(FDR)/sqrt(n_sim))
#'
#'
#'     sp1 <- strsplit(as.character(out_red$Scenario), split = "_")
#'     out_red$m11 <- plyr::laply(sp1, function(x)paste0("m11=",x[2]))
#'     out_red$m11 <- factor(out_red$m11, levels = c(paste0("m11=",m11[1]),paste0("m11=",m11[2]),
#'                                                   paste0("m11=",m11[3]),paste0("m11=",m11[4])))
#'     out_red$m01 <- plyr::laply(sp1, function(x)paste0("m01=",x[3]))
#'     out_red$m01 <- factor(out_red$m01, levels = c(paste0("m01=",m01[1]),paste0("m01=",m01[2]),
#'                                                   paste0("m01=",m01[3])))
#'
#'     out_red$m10 <- plyr::laply(sp1, function(x)paste0("m10=",x[4]))
#'     out_red$m10 <- factor(out_red$m10, levels = c(paste0("m10=",m10[1]),paste0("m10=",m10[2]),
#'                                                   paste0("m10=",m10[3])))
#'     out_red$Method <- factor(out_red$Method, levels = c("Orr", "HY", "Zhao"))
#'
#'     p1 <- ggplot(out_red, aes(x = Method, y = Ave_NTP))+
#'       geom_col()+
#'       geom_errorbar(aes(ymin = Ave_NTP - SD_NTP, ymax = Ave_NTP + SD_NTP), width = .3) +
#'       facet_grid(m11~m01+m10, scale = "free")+
#'       theme(axis.text.x = element_text(angle = 90, hjust = 1))
#'     p2 <- ggplot(out_red, aes(x = Method, y = Ave_FDR))+
#'       geom_col()+
#'       geom_errorbar(aes(ymin = Ave_FDR - SD_FDR, ymax = Ave_FDR + SD_FDR), width = .3) +
#'       facet_grid(m11~m01+m10, scale = "free") + geom_hline(yintercept = 0.05)+
#'       theme(axis.text.x = element_text(angle = 90, hjust = 1))
#'     p <- gridExtra::grid.arrange(p2, p1, nrow = 1)
#'     saveRDS(p, file = paste0("Plotsim_m_", m, "_i_", i,
#'                              "_j_", j,  "_mu1_",
#'                              mu1_mean, "_mu2_", mu2_mean, ".rds"))
#'   }
#' }
#'
#' #
#' # for(i in 1:2){
#' #   for(j in 1:4){
#' #     m11 <- mlist[[i]][[1]]
#' #     m01 <- m10<- mlist[[i]][[2]]
#' #     mu1_mean <- mu2_mean <- mean_list[j]
#' #     p <- readRDS(file = paste0("R/Plotsim_m_", m, "_i_", i,
#' #                              "_j_", j,  "_mu1_",
#' #                              mu1_mean, "_mu2_", mu2_mean, ".rds"))
#' #     ggsave(p, filename = paste0("R/Plotsim_m_", m, "_i_", i,
#' #                                 "_j_", j,  "_mu1_",
#' #                                 mu1_mean, "_mu2_", mu2_mean, ".png"), height = 8,
#' #                    width = 15)
#' #
#' #      }
#' # }
#'
#'
ntyet/sfdr documentation built on May 7, 2019, 1:30 p.m.