R/dea.R

Defines functions dea_init

dea_init <- function(fit, data, data.design, condition) {
  out = list()

  # prepare design
  out$DT.design <- data.table::as.data.table(data.design)
  if (!is.null(out$DT.design$AssayID)) out$DT.design[, AssayID := NULL]
  if (!is.null(out$DT.design$nPeptide)) out$DT.design[, nPeptide := NULL]
  if (!is.null(out$DT.design$nFeature)) out$DT.design[, nFeature := NULL]
  out$DT.design <- merge(out$DT.design, design(fit, as.data.table = T)[, .(AssayID, Assay)], by = "Assay")

  # prepare quants
  setDT(data)
  out$DT <- merge(data, out$DT.design, by = "AssayID")
  setDF(data)

  # organise data by pairwise levels of condition
  out$contrasts = combn(levels(out$DT[, get(condition)]), 2)
  out$DT.meta <- data.table::rbindlist(lapply(1:ncol(out$contrasts), function(j) {
    DT.output <- merge(
      out$DT[, .(AssayID, ProteinID, nPeptide, nFeature)],
      out$DT[as.character(get(condition)) %in% out$contrasts[, j]],
      by = c("AssayID", "ProteinID", "nPeptide", "nFeature"), all.x = T
    )
    DT.output[, (condition) := factor(as.character(get(condition)), levels = out$contrasts[, j])]
    DT.output <- DT.output[, .(
      `1:nMaxPeptide` = max(0, nPeptide[get(condition) == levels(get(condition))[1]], na.rm = T),
      `2:nMaxPeptide` = max(0, nPeptide[get(condition) == levels(get(condition))[2]], na.rm = T),
      `1:nMaxFeature` = max(0, nFeature[get(condition) == levels(get(condition))[1]], na.rm = T),
      `2:nMaxFeature` = max(0, nFeature[get(condition) == levels(get(condition))[2]], na.rm = T),
      `1:nTestSample` = length(unique(Sample[!is.na(Sample) & get(condition) == levels(get(condition))[1]])),
      `2:nTestSample` = length(unique(Sample[!is.na(Sample) & get(condition) == levels(get(condition))[2]])),
      `1:nRealSample` = sum(0, nPeptide[get(condition) == levels(get(condition))[1]] > 0, na.rm = T),
      `2:nRealSample` = sum(0, nPeptide[get(condition) == levels(get(condition))[2]] > 0, na.rm = T)
    ), by = ProteinID]
    DT.output[, Model := factor(paste(out$contrasts[, j], collapse = "_v_"))]
    data.table::setcolorder(DT.output, "Model")

    DT.output
  }))

  return(out)
}


#' Univariate differential expression analysis with 'limma::lmFit' linear model and 'limma::eBayes' moderation of standard errors
#'
#' The model is performed pair-wise across the levels of the 'Condition' in 'data.design'. Default is a standard Student's t-test model.
#'  todo: see http://www.metafor-project.org/doku.php/tips:rma_vs_lm_lme_lmer
#'  todo: https://stats.stackexchange.com/questions/142685/equivalent-to-welchs-t-test-in-gls-framework
#'
#' @import data.table
#' @import doRNG
#' @export
dea_limma <- function(
  fit,
  data = protein_quants(fit),
  data.design = design(fit),
  condition = "Condition",
  output = "limma",
  save.intercept = FALSE,
  as.data.table = FALSE,
  ...,
  use.moderation = TRUE,
  use.precision = TRUE
) {
  message("WARNING: 'dea_limma' supports only a single residual variance, not Welch style per-condition variances")
  if (!use.precision) message("WARNING: With 'use.precision = FALSE' this function does not use BayesProt quant precision estimates and hence should only be used for comparative purposes with the other 'dea' methods.")

  arguments <- eval(substitute(alist(...)))
  if (any(names(arguments) == "")) stop("all arguments in ... to be passed to 'limma::lmFit' must be named")
  if ("design" %in% names(arguments)) stop("do not pass a 'design' argument to 'limma::lmFit'")
  if ("weights" %in% names(arguments)) stop("do not pass a 'weights' argument to 'limma::lmFit'")
  if (is.null(data.design$AssayID)) data.design <- merge(data.design, design(fit, as.data.table = T)[, .(Assay, AssayID)], by = "Assay")

  control = control(fit)
  input <- dea_init(fit, data, data.design, condition)
  cts <- input$contrasts

  # for each contrast
  DT.de <- rbindlist(lapply(1:ncol(cts), function (j) {
    output.contrast <- list()

    # input
    output.contrast$DT.input <- input$DT[as.character(get(condition)) %in% cts[, j]]
    output.contrast$DT.input[, (condition) := factor(as.character(get(condition)), levels = cts[, j])]
    output.contrast$DT.input <- droplevels(output.contrast$DT.input)
    setcolorder(output.contrast$DT.input, c("AssayID", "Assay", "Run", "Channel", "Sample", "m", "s"))

    # design
    output.contrast$DT.design <- input$DT.design[as.character(get(condition)) %in% cts[, j]]
    output.contrast$DT.design[, (condition) := factor(as.character(get(condition)), levels = cts[, j])]
    output.contrast$DT.design <- droplevels(output.contrast$DT.design)

    # run limma
    limma.design <- model.matrix(~ Condition, output.contrast$DT.design)
    limma.object <- as.matrix(data.table::dcast(output.contrast$DT.input, ProteinID ~ AssayID, value.var = "m"), rownames = "ProteinID")
    if (use.precision) {
      limma.weights <- as.matrix(data.table::dcast(output.contrast$DT.input, ProteinID ~ AssayID, value.var = "s"), rownames = "ProteinID")
      limma.weights <- 1.0 / (limma.weights^2)
    } else {
      limma.weights <- NULL
    }
    ft <- limma::lmFit(limma.object, limma.design, weights = limma.weights)
    if (use.moderation == T) ft <- limma::eBayes(ft)

    # save
    saveRDS(ft, file.path(fit, "model", "protein.de", paste0(output, ".", j, ".rds")))

    coefs <- colnames(ft$coefficients)
    if (save.intercept == FALSE) coefs <- coefs[which(coefs != "(Intercept)")]
    rbindlist(lapply(coefs, function (coef) {
      DT <- data.table(
        Effect = factor(coef),
        ProteinID = as.integer(rownames(ft$coefficients))
      )

      if (use.moderation == T) {
        DT[, v_residual := ft$s2.post]
        DT[, df_residual := ft$df.total]
        DT[, m := ft$coefficients[, coef]]
        DT[, s := ft$stdev.unscaled[, coef] * sqrt(ft$s2.post)]
        DT[, df := df_residual]
        DT[, t := ft$t[, coef]]
        DT[, pvalue := ft$p.value[, coef]]
      } else {
        DT[, v_residual := ft$sigma^2]
        DT[, df_residual := ft$df.residual]
        DT[, m := ft$coefficients[, coef]]
        DT[, s := ft$stdev.unscaled[, coef]]
        DT[, df := df_residual]
        DT[, t := m / s] # limma user manual appears to be wrong
        DT[, pvalue := 2 * pt(-abs(t), df)]
      }

      return(DT)
    }))
  }), idcol = "Model")
  DT.de[, Model := factor(Model, labels = sapply(1:ncol(cts), function (i) paste(cts[, i], collapse = "_v_")))]

  # highlights if any proteins are missing for any model
  DT.proteins <- CJ(proteins(fit, as.data.table = T)[, ProteinID], DT.de$Model, unique = T)[, .(ProteinID = V1, Model = V2)]
  DT.de <- merge(DT.proteins, DT.de, by = c("ProteinID", "Model"), all.x = T, allow.cartesian = T)

  # ensure every ProteinID::Model has a full set of covariates and n
  DT.de <- DT.de[, merge(CJ(ProteinID, Effect, unique = T), .SD, by = c("ProteinID", "Effect"), keyby = "Effect", all = T), by = Model]
  DT.de <- DT.de[!is.na(Effect)]

  # merge in input.meta
  DT.de <- merge(input$DT.meta, DT.de, by = c("Model", "ProteinID"))
  setcolorder(DT.de, c("Model", "Effect", "ProteinID"))

  if (!as.data.table) setDF(DT.de)
  else DT.de[]
  return(DT.de)
}


#' Mixed-effects univariate differential expression analysis with 'metafor'
#'
#' The model is performed pair-wise across the levels of the 'Condition' in 'data.design'. Default is a standard Student's t-test model. See 'rma.uni' manpage for limitations of this approach.
#'
#' @import data.table
#' @import metafor
#' @import doRNG
#' @export
dea_metafor <- function(
  fit,
  data = protein_quants(fit),
  data.design = design(fit),
  condition = "Condition",
  mods = as.formula(paste("~", condition)),
  random = ~ Condition | Sample,
  struct = "DIAG",
  test = "t",
  output = "metafor",
  save.intercept = FALSE,
  as.data.table = FALSE,
  ...
) {
  arguments <- eval(substitute(alist(...)))
  if (any(names(arguments) == "")) stop("all arguments in ... to be passed to 'metafor::rma.mv' must be named")
  if ("yi" %in% names(arguments)) stop("do not pass a 'yi' argument to metafor")
  if ("V" %in% names(arguments)) stop("do not pass a 'V' argument to metafor")
  if ("data" %in% names(arguments)) stop("do not pass a 'data' argument to metafor")
  if ("test" %in% names(arguments)) stop("do not pass a 'test' argument to metafor")
  if (!("control" %in% names(arguments))) control = list()

  input <- dea_init(fit, data, data.design, condition)
  cts <- input$contrasts
  DTs.chunk <- batch_split(input$DT, "ProteinID", 16)

  # start cluster and reproducible seed
  pb <- txtProgressBar(max = length(DTs.chunk), style = 3)
  DT.de <- foreach(
    DT.chunk = iterators::iter(DTs.chunk),
    .final = rbindlist,
    .inorder = F,
    .packages = "data.table",
    .options.snow = list(progress = function(n) setTxtProgressBar(pb, n))
  ) %dorng% {
    # process chunk
    output.chunk <- lapply(split(DT.chunk, by = "ProteinID", drop = T), function(DT) {
      DT[, ProteinID := NULL]

      output.protein <- lapply(1:ncol(cts), function (j) {
        output.contrast <- list()

        # input
        output.contrast$DT.input <- DT[as.character(get(condition)) %in% cts[, j]]
        output.contrast$DT.input[, (condition) := factor(as.character(get(condition)), levels = cts[, j])]
        output.contrast$DT.input <- droplevels(output.contrast$DT.input)
        setcolorder(output.contrast$DT.input, c("AssayID", "Assay", "Run", "Channel", "Sample", "m", "s"))

        if (nrow(output.contrast$DT.input) > 0) {
          # metafor will moan if SE is 0
          output.contrast$DT.input[, s := ifelse(s < 0.001, 0.001, s)]
          # metafor will moan if the ratio between largest and smallest sampling variance is >= 1e7
          output.contrast$DT.input[, s := ifelse(s^2 / min(s^2) >= 1e7, sqrt(min(s^2) * (1e7 - 1)), s)]
        }

        # fit
        if (length(unique(output.contrast$DT.input[get(condition) == cts[1, j], Sample])) >= 2 &&
            length(unique(output.contrast$DT.input[get(condition) == cts[2, j], Sample])) >= 2) {

          for (i in 0:9) {
            try( {
              output.contrast$fit <- rma.mv(
                yi = m,
                V = s^2,
                data = output.contrast$DT.input,
                control = list(sigma2.init = 0.1 + 0.1 * i),
                test = test,
                mods = mods,
                random = random,
                struct = struct,
                ...
              )
              #output.contrast$fit <- rma.uni(yi = m, sei = s, data = output.contrast$DT.input, mods = mods, test = test, ...)
              output.contrast$log <- paste0("[", Sys.time(), "] attempt ", i + 1, " succeeded\n")
              break
            })
          }
        } else {
          output.contrast$log <- paste0("[", Sys.time(), "] ignored as n < 2 for one or both conditions\n")
        }

        output.contrast
      })
      names(output.protein) <- sapply(1:length(output.protein), function(j) paste(cts[,j], collapse = "_v_"))

      output.protein
    })

    # save chunk
    saveRDS(output.chunk, file.path(fit, "model", "protein.de", paste0(output, ".", DT.chunk[1, ProteinID], ".rds")))

    # extract results
    DT.de <- rbindlist(lapply(output.chunk, function (output.protein) {
      rbindlist(lapply(output.protein, function (output.contrast) {
        if (!is.null(output.contrast$fit) && !is.null(output.contrast$fit$b) && length(output.contrast$fit$b) > 0) {
          fit_stats <- data.table(t(fitstats(output.contrast$fit)))
          names(fit_stats) <- sub(":$", "", names(fit_stats))
          v_sigma2 <- data.table(t(output.contrast$fit$sigma2))
          names(v_sigma2) <- paste0("v_s", 1:length(v_sigma2))
          v_tau2 <- data.table(t(output.contrast$fit$tau2))
          names(v_tau2) <- paste0("v_t", 1:length(v_tau2))
          DT.de <- data.table(
            Effect = rownames(output.contrast$fit$b),
            fit_stats,
            v_sigma2,
            v_tau2,
            lower = output.contrast$fit$ci.lb,
            upper = output.contrast$fit$ci.ub,
            m = output.contrast$fit$b[, 1],
            s = output.contrast$fit$se,
            df = df.residual(output.contrast$fit),
            t = output.contrast$fit$zval,
            pvalue = output.contrast$fit$pval
          )
          if (!save.intercept) DT.de <- DT.de[Effect != "intrcpt"]
          DT.de
        }
      }), idcol = "Model")
    }), idcol = "ProteinID")

    DT.de
  }
  setTxtProgressBar(pb, length(DTs.chunk))
  close(pb)

  DT.de[, ProteinID := as.integer(ProteinID)]
  DT.de[, Model := factor(Model, levels = sapply(1:ncol(cts), function (i) paste(cts[, i], collapse = "_v_")))]
  DT.de[, Effect := factor(Effect, levels = unique(Effect))]

  # highlights if any protein are missing for any model
  DT.proteins <- CJ(proteins(fit, as.data.table = T)[, ProteinID], DT.de$Model, unique = T)[, .(ProteinID = V1, Model = V2)]
  DT.de <- merge(DT.proteins, DT.de, by = c("ProteinID", "Model"), all.x = T, allow.cartesian = T)

  # ensure every ProteinID::Model has a full set of covariates and n
  DT.de <- DT.de[, merge(CJ(ProteinID, Effect, unique = T), .SD, by = c("ProteinID", "Effect"), keyby = "Effect", all = T), by = Model]
  DT.de <- DT.de[!is.na(Effect)]

  # merge in input.meta
  DT.de <- merge(input$DT.meta, DT.de, by = c("Model", "ProteinID"))
  setcolorder(DT.de, c("Model", "Effect", "ProteinID"))

  if (!as.data.table) setDF(DT.de)
  else DT.de[]
  return(DT.de)
}


#' Mixed-effects univariate differential expression analysis with 'MCMCglmm'
#'
#' The model is performed pair-wise across the levels of the 'Condition' in 'data.design'. Default is a standard Student's t-test model.
#'
#' @import data.table
#' @import doRNG
#' @export
dea_MCMCglmm <- function(
  fit,
  data = protein_quants(fit),
  data.design = design(fit),
  condition = "Condition",
  fixed = as.formula(paste("~", condition)),
  random = NULL,
  residual.welch = TRUE,
  save.intercept = FALSE,
  output = "MCMCglmm",
  as.data.table = FALSE,
  use.moderation = FALSE,
  use.precision = TRUE,
  dist.mean.func = dist_lst_mcmc,
  dist.var.func = dist_invchisq_mcmc,
  squeeze.var.func = squeeze_var,
  model.seed = control(fit)$model.seed,
  model.nchain = control(fit)$model.nchain,
  model.nwarmup  = control(fit)$model.nwarmup,
  model.thin = control(fit)$model.thin,
  model.nsample = control(fit)$model.nsample
) {
  if (use.moderation) message("WARNING: 'use.moderation = TRUE' has not been validated")
  if (!use.precision) message("WARNING: With 'use.precision = FALSE' this function does not use BayesProt quant precision estimates and hence should only be used for comparative purposes with the other 'dea' methods.")

  if (is.null(data.design$AssayID)) data.design <- merge(data.design, design(fit, as.data.table = T)[, .(Assay, AssayID)], by = "Assay")
  control = control(fit)
  fixed <- as.formula(sub("^.*~", "m ~", deparse(fixed)))
  nitt <- model.nwarmup + (model.nsample * model.thin) / model.nchain

  input <- dea_init(fit, data, data.design, condition)
  DTs <- batch_split(input$DT, "ProteinID", 64)
  cts <- input$contrasts

  # progress bar
  if (use.moderation == T) {
    nstep <- 2 * model.nchain + 3
  } else {
    nstep <- model.nchain + 2
  }
  pb <- txtProgressBar(max = nstep * length(DTs), style = 3)

  rbindlist_of_lists <- function(input) {
    for (j in names(input[[1]])) input[[1]][[j]] <- rbindlist(lapply(1:length(input), function(i) input[[i]][[j]]))
    input[[1]]
  }

  execute_model <- function(DT.priors, chain, step, ...) {
    # hack for foreach to 'see' objects in the calling environment
    for(n in ls(parent.env(environment()), all.names = T)) if (!exists(n, environment(), inherits = F)) assign(n, get(n, parent.env(environment())), environment())

    set.seed(control$model.seed + chain)
    out <- foreach(DT.chunk = iterators::iter(DTs), .final = rbindlist_of_lists, .inorder = F, .packages = "data.table", .options.snow = list(progress = function(n) setTxtProgressBar(pb, (step-1)*length(DTs) + n))) %dorng% {
      # process chunk
      output.chunk <- lapply(split(DT.chunk, by = "ProteinID", drop = T), function(DT) {
        DT[, ProteinID := NULL]

        output.protein <- lapply(1:ncol(cts), function (j) {
          output.contrast <- list()

          # input
          output.contrast$DT.input <- DT[as.character(get(condition)) %in% cts[, j]]
          output.contrast$DT.input[, (condition) := factor(as.character(get(condition)), levels = cts[, j])]
          output.contrast$DT.input <- droplevels(output.contrast$DT.input)
          setcolorder(output.contrast$DT.input, c("AssayID", "Assay", "Run", "Channel", "Sample", "m", "s"))

          # fit
          if (length(unique(output.contrast$DT.input[get(condition) == cts[1, j], Sample])) >= 2 &&
              length(unique(output.contrast$DT.input[get(condition) == cts[2, j], Sample])) >= 2) {

            if (use.precision) {
              mev = output.contrast$DT.input$s^2
            } else {
              mev = NULL
            }

            if (residual.welch) {
              for (l in levels(output.contrast$DT.input$Condition)) output.contrast$DT.input[, paste0("Condition", l) := ifelse(Condition == l, 1, 0)]
              rcov <- as.formula(paste("~", paste(paste0("idh(Condition", levels(output.contrast$DT.input$Condition), "):units"), collapse = "+")))
              if (is.null(DT.priors)) {
                prior <- list(R = lapply(1:nlevels(output.contrast$DT.input$Condition), function(i) list(V = 1, nu = 0.002)))
              } else {
                prior <- list(R = split(DT.priors[Model == paste0(cts[1, j], "_v_", cts[2, j]), .(Effect, v, df)], by = "Effect", keep.by = F))
                for (i in 1:length(prior$R)) prior$R[[i]] <- list(V = prior$R[[i]]$v, nu = prior$R[[i]]$df)
              }
            } else {
              rcov <- ~ units
              if (is.null(DT.priors)) {
                prior <- list(R = list(V = 1, nu = 0.02))
              } else {
                prior <- list(R = list(V = DT.priors$v, nu = DT.priors$df))
              }
            }

            print(prior)

            output.contrast$fit <- MCMCglmm::MCMCglmm(
              fixed = fixed,
              random = random,
              rcov = rcov,
              mev = mev,
              data = output.contrast$DT.input,
              prior = prior,
              nitt = nitt,
              burnin = model.nwarmup,
              thin = model.thin,
              verbose = F
            )

            output.contrast$log <- paste0("[", Sys.time(), "] succeeded\n")
          } else {
            output.contrast$log <- paste0("[", Sys.time(), "] ignored as n < 2 for one or both conditions\n")
          }

          output.contrast
        })
        names(output.protein) <- sapply(1:length(output.protein), function(j) paste(cts[,j], collapse = "_v_"))

        output.protein
      })

      # save chunk
      saveRDS(output.chunk, file.path(fit, "model", "protein.de", paste0(output, ifelse(is.null(DT.priors), "0", ""), ".", DT.chunk[1, ProteinID], ".rds")))

      # extract results
      out <- list()

      out$DT.fixed <- rbindlist(lapply(output.chunk, function (output.protein) {
        rbindlist(lapply(output.protein, function (output.contrast) {
          if (!is.null(output.contrast$fit)) {
            DT.sol <- as.data.table(output.contrast$fit$Sol)
            DT.sol[, mcmcID := 1:nrow(DT.sol)]
            if (!save.intercept) DT.sol[, `(Intercept)` := NULL]
            melt(DT.sol, id.vars = "mcmcID", variable.name = "Effect")
          }
        }), idcol = "Model")
      }), idcol = "ProteinID")
      out$DT.fixed[, ProteinID := as.integer(ProteinID)]
      out$DT.fixed[, Model := factor(Model, levels = unique(Model))]
      out$DT.fixed[, chainID := chain]
      setcolorder(out$DT.fixed, c("ProteinID", "Model", "Effect", "chainID", "mcmcID"))

      out$DT.rcov <- rbindlist(lapply(output.chunk, function (output.protein) {
        rbindlist(lapply(output.protein, function (output.contrast) {
          if (!is.null(output.contrast$fit)) {
            nc <- ncol(output.contrast$fit$VCV)
            if (residual.welch) {
              DT.vcv <- as.data.table(output.contrast$fit$VCV[, (nc-nlevels(output.contrast$DT.input$Condition)+1):nc, drop = F])
            } else {
              DT.vcv <- as.data.table(output.contrast$fit$VCV[, nc, drop = F])
            }
            DT.vcv[, mcmcID := 1:nrow(DT.vcv)]
            melt(DT.vcv, id.vars = "mcmcID", variable.name = "Effect")
          }
        }), idcol = "Model")
      }), idcol = "ProteinID")
      if (residual.welch) {
        levels(out$DT.rcov$Effect) <- sub(paste0(condition, "(.*)\\.units"), "\\1", levels(out$DT.rcov$Effect))
      } else {
        levels(out$DT.rcov$Effect) <- "residual"
      }
      out$DT.rcov[, ProteinID := as.integer(ProteinID)]
      out$DT.rcov[, Model := factor(Model, levels = unique(Model))]
      out$DT.rcov[, chainID := chain]
      setcolorder(out$DT.rcov, c("ProteinID", "Model", "Effect", "chainID", "mcmcID"))

      rm(output.chunk)
      gc()
      out
    }

    out2 <- list()

    setkey(out$DT.fixed, ProteinID, Model, Effect)
    filename <- paste0(output, "0.", "fixed")
    fst::write.fst(out$DT.fixed, file.path(fit, "model", "protein.de", paste0(filename, ".", chain, ".fst")))
    if (chain == 1) {
      # write index
      out2$DT.fixed.index <- out$DT.fixed[, .(
        from = .I[!duplicated(out$DT.fixed, by = "ProteinID")],
        to = .I[!duplicated(out$DT.fixed, fromLast = T, by = "ProteinID")]
      )]
      out2$DT.fixed.index <- cbind(
        out$DT.fixed[out2$DT.fixed.index$from, .(ProteinID)],
        data.table(file = file.path("protein.de", filename)),
        out2$DT.fixed.index
      )
    }
    out$DT.fixed <- NULL

    setkey(out$DT.rcov, ProteinID, Model, Effect)
    filename <- paste0(output, "0.", "rcov")
    fst::write.fst(out$DT.rcov, file.path(fit, "model", "protein.de", paste0(filename, ".", chain, ".fst")))
    if (chain == 1) {
      # write index
      out2$DT.rcov.index <- out$DT.rcov[, .(
        from = .I[!duplicated(out$DT.rcov, by = "ProteinID")],
        to = .I[!duplicated(out$DT.rcov, fromLast = T, by = "ProteinID")]
      )]
      out2$DT.rcov.index <- cbind(
        out$DT.rcov[out2$DT.rcov.index$from, .(ProteinID)],
        data.table(file = file.path("protein.de", filename)),
        out2$DT.rcov.index
      )
    }
    out$DT.rcov <- NULL

    out2
  }

  process_model <- function(DT.priors, stage, step, ...) {
    # hack for foreach to 'see' objects in the calling environment
    for(n in ls(parent.env(environment()), all.names = T)) if (!exists(n, environment(), inherits = F)) assign(n, get(n, parent.env(environment())), environment())

    # execute model across all chains
    out <- rbindlist_of_lists(lapply(1:model.nchain, function(chain) execute_model(DT.priors, chain, step + chain)))

    # summarise
    DTs.rcov.index <- batch_split(out$DT.rcov.index, "ProteinID", 64)
    out$DT.rcov <- foreach(DT.index = iterators::iter(DTs.rcov.index), .combine = rbind, .inorder = F, .packages = "data.table", .options.snow = list(progress = function(j) setTxtProgressBar(pb, (step+model.nchain)*length(DTs) + j))) %dorng% {
      rbindlist(lapply(1:nrow(DT.index), function(i) {
        DT <- rbindlist(lapply(1:model.nchain, function(chain) {
          fst::read.fst(paste0(file.path(fit, "model", DT.index[i, file]), ".", chain, ".fst"), from = DT.index[i, from], to = DT.index[i, to], as.data.table = T)
        }))
        DT[, dist.var.func(chainID, mcmcID, value), by = .(Model, Effect, ProteinID)]
      }))
    }

    if (stage > 0) {
      DTs.fixed.index <- batch_split(out$DT.fixed.index, "ProteinID", 64)
      out$DT.fixed <- foreach(DT.index = iterators::iter(DTs.fixed.index), .combine = rbind, .inorder = F, .packages = "data.table", .options.snow = list(progress = function(j) setTxtProgressBar(pb, (step+model.nchain+1)*length(DTs) + j))) %dorng% {
        rbindlist(lapply(1:nrow(DT.index), function(i) {
          DT <- rbindlist(lapply(1:model.nchain, function(chain) {
            fst::read.fst(paste0(file.path(fit, "model", DT.index[i, file]), ".", chain, ".fst"), from = DT.index[i, from], to = DT.index[i, to], as.data.table = T)
          }))
          DT[, dist.mean.func(chainID, mcmcID, value), by = .(Model, Effect, ProteinID)]
        }))
      }
    }

    out
  }

  if (use.moderation) {
    # unmoderated tests
    out <- process_model(NULL, 0, 0)

    # plot variance fit
    DT.vars <- split(out$DT.rcov, by = c("Model", "Effect"))
    for (i in 1:length(DT.vars)) {
      g <- ggplot2::ggplot(DT.vars[[i]], ggplot2::aes(x = ProteinID, y = v)) + ggplot2::geom_point(size = 0.1) + ggplot2::scale_y_continuous(trans = "log2")
      ggplot2::ggsave(file.path(fit, "model", paste0("protein_de0__", output, "__", DT.vars[[i]]$Model[1], "__", DT.vars[[i]]$Effect[1], "__v.pdf")), g, width = 8, height = 6, limitsize = F)

      g <- ggplot2::ggplot(DT.vars[[i]], ggplot2::aes(x = ProteinID, y = df)) + ggplot2::geom_point(size = 0.1) + ggplot2::scale_y_continuous(trans = "log2")
      ggplot2::ggsave(file.path(fit, "model", paste0("protein_de0__", output, "__", DT.vars[[i]]$Model[1], "__", DT.vars[[i]]$Effect[1], "__df.pdf")), g, width = 8, height = 6, limitsize = F)
    }

    # squeeze variances
    DT.priors <- out$DT.rcov[, squeeze.var.func(v, df), by = .(Model, Effect)]
    #DT.priors <- out$DT.rcov[, squeeze_var(v, df), by = .(Model, Effect)]
    #system.time(print(out$DT.rcov[, dist_sf_with_fixed_df1(v, df, optim.method = "L-BFGS-B", control=list(trace=1, REPORT=1)), by = .(Model, Effect)]))

    # plot eb priors fits
    g <- plot_fits(out$DT.rcov, DT.priors, c(0.0, 0.99), by = "Effect")
    suppressWarnings(ggplot2::ggsave(file.path(fit, "model", paste0("protein_de__", output, "__qc_vars.pdf")), g, width = 8, height = 0.5 + 1.5 * nrow(DT.priors), limitsize = F))

    # plot eb prior stdevs
    g <- plot_fits(out$DT.rcov, DT.priors, c(0.0, 0.99), by = "Effect", xlab = "log2 Standard Deviation", trans = sqrt, inv.trans = function(x) x^2)
    suppressWarnings(ggplot2::ggsave(file.path(fit, "model", paste0("protein_de__", output, "__qc_stdevs.pdf")), g, width = 8, height = 0.5 + 1.5 * nrow(DT.priors), limitsize = F))

    # moderated tests
    out <- process_model(DT.priors, 1, model.nchain + 1)
  } else {
    # unmoderated tests
    out <- process_model(NULL, 1, 0)
  }

  setTxtProgressBar(pb, nstep * length(DTs))
  close(pb)

  #fst::write.fst(out2$DT.fixed.index, file.path(fit, "model", paste0("protein.de.", filename, ".index.fst")))
  #fst::write.fst(out2$DT.rcov.index, file.path(fit, "model", paste0("protein.de.", filename, ".index.fst")))

  # prepare output
  DT.de <- dcast(out$DT.rcov, Model + ProteinID ~ Effect, drop = F, value.var = c("v", "df"))
  DT.de <- merge(DT.de, out$DT.fixed, by = c("Model", "ProteinID"))
  DT.de[, Model := factor(Model, levels = sapply(1:ncol(cts), function (i) paste(cts[, i], collapse = "_v_")))]
  DT.de[, Effect := factor(Effect, levels = unique(Effect))]

  # highlights if any proteins are missing for any model
  DT.proteins <- CJ(proteins(fit, as.data.table = T)[, ProteinID], DT.de$Model, unique = T)[, .(ProteinID = V1, Model = V2)]
  DT.de <- merge(DT.proteins, DT.de, by = c("ProteinID", "Model"), all.x = T, allow.cartesian = T)

  # ensure every ProteinID::Model has a full set of covariates and n
  DT.de <- DT.de[, merge(CJ(ProteinID, Effect, unique = T), .SD, by = c("ProteinID", "Effect"), keyby = "Effect", all = T), by = Model]
  DT.de <- DT.de[!is.na(Effect)]

  # merge in input.meta
  DT.de <- merge(input$DT.meta, DT.de, by = c("Model", "ProteinID"))
  setcolorder(DT.de, c("Model", "Effect", "ProteinID"))

  if (!as.data.table) setDF(DT.de)
  else DT.de[]
  return(DT.de)
}
biospi/bayesprot documentation built on Nov. 9, 2019, 2:40 p.m.