R/differential_expression_external.R

Defines functions error error MASTDETest PoissonDETest NegBinomDETest TobitTest DiffTTest varImportances MarkerTest DiffExpTest

Documented in DiffExpTest DiffTTest MarkerTest MASTDETest NegBinomDETest PoissonDETest TobitTest varImportances

#' Likelihood ratio test for zero-inflated data
#'
#' Identifies differentially expressed SJs between two groups of cells using
#' the LRT model proposed in McDavid et al, Bioinformatics, 2013
#'
#' @inheritParams FindMarkers
#' @param object ICASDataSet object
#' @param cells.1 Group 1 cells
#' @param cells.2 Group 2 cells
#' @return Returns a p-value ranked matrix of putative differentially expressed
#' SJs.
#'
#' @export
#'
#' @importFrom parallel mclapply
#' @importFrom pbapply pblapply
#'
DiffExpTest <- function(
  object,
  cells.1,
  cells.2,
  genes.use = NULL,
  print.bar = TRUE,
  NT = 1
) {
  data.test <- psi(object = object)
  genes.use <- SetIfNull(x = genes.use, default = rownames(data.test))
  if (print.bar) {
    p_val <- unlist(
      x = pbapply::pblapply(
        X = genes.use,
        FUN = function(x) {
          return(
            DifferentialLRT(
              x = as.numeric(x = data.test[x, cells.1])[!is.na(as.numeric(x = data.test[x, cells.1]))],
              y = as.numeric(x = data.test[x, cells.2])[!is.na(as.numeric(x = data.test[x, cells.2]))]
            )
          )
        },
        cl = NT
      )
    )
  } else {
    iterate.fxn <- lapply
    p_val <- unlist(
      x = parallel::mclapply(
        X = genes.use,
        FUN = function(x) {
          return(
            DifferentialLRT(
              x = as.numeric(x = data.test[x, cells.1])[!is.na(as.numeric(x = data.test[x, cells.1]))],
              y = as.numeric(x = data.test[x, cells.2])[!is.na(as.numeric(x = data.test[x, cells.2]))]
            )
          )
        },
        mc.cores = NT
      )
    )
  }
  to.return <- data.frame(p_val, row.names = genes.use)
  return(to.return)
}


#' ROC-based marker discovery
#'
#' Identifies 'markers' of SJ expression using ROC analysis. For each SJ,
#' evaluates (using AUC) a classifier built on that SJ alone, to classify
#' between two groups of cells.
#'
#' An AUC value of 1 means that expression values for this SJ alone can
#' perfectly classify the two groupings (i.e. Each of the cells in cells.1
#' exhibit a higher level than each of the cells in cells.2). An AUC value of 0
#' also means there is perfect classification, but in the other direction. A
#' value of 0.5 implies that the SJ has no predictive power to classify the
#' two groups.
#'
#' @inheritParams FindMarkers
#' @inheritParams DiffExpTest
#' @param object ICASDataSet object
#'
#' @return Returns a 'predictive power' (abs(AUC-0.5)) ranked matrix of
#' putative differentially expressed SJs.
#'
#' @export
#'
#'
MarkerTest <- function(
  object,
  cells.1,
  cells.2,
  genes.use = NULL,
  print.bar = TRUE
) {
  data.test <- psi(object = object)
  genes.use <- SetIfNull(x = genes.use, default = rownames(x = data.test))
  to.return <- AUCMarkerTest(
    data1 = data.test[, cells.1],
    data2 = data.test[, cells.2],
    mygenes = genes.use,
    print.bar = print.bar
  )
  to.return$power <- abs(x = to.return$myAUC - 0.5) * 2
  #print(head(to.return))
  return(to.return)
}


#' random forests based marker discovery
#'
#' Identifies 'markers' of SJ expression using OOB analysis. For each SJ,
#' evaluates (using AUC) a classifier built on that SJ alone, to classify
#' between two groups of cells.
#'
#' @inheritParams FindMarkers
#' @inheritParams DiffExpTest
#' @param object ICASDataSet object
#'
#' @return Returns a 'predictive power' (abs(AUC-0.5)) ranked matrix of
#' putative differentially expressed SJs.
#'
#' @export

varImportances <- function(
  object,
  cells.1,
  cells.2,
  genes.use = NULL,
  print.bar = TRUE,
  NT = 1
) {
  data.test <- psi(object = object)
  genes.use <- SetIfNull(x = genes.use, default = rownames(x = data.test))

  if(print.bar) {
    avg_diff <- unlist(x = pbapply::pblapply(
      X = genes.use,
      FUN = function(x) {
        data1 <- as.numeric(data.test[x, cells.1])[!is.na(as.numeric(data.test[x, cells.1]))]
        data2 <- as.numeric(data.test[x, cells.2])[!is.na(as.numeric(data.test[x, cells.2]))]
        if(length(data1) == 0 | length(data2) == 0) {
          return(1)
        } else {
          return(RandomOOB(x = data1, y = data2))
        }
      }
      , cl = NT))
  } else {
    avg_diff <- unlist(x = parallel::mclapply(
      X = genes.use,
      FUN = function(x) {
        data1 <- as.numeric(data.test[x, cells.1])[!is.na(as.numeric(data.test[x, cells.1]))]
        data2 <- as.numeric(data.test[x, cells.2])[!is.na(as.numeric(data.test[x, cells.2]))]
        if(length(data1) == 0 | length(data2) == 0) {
          return(1)
        } else {
          return(RandomOOB(x = data1, y = data2))
        }
      }
      , mc.cores = NT))
  }
  to.return <- data.frame(Importance = avg_diff, row.names = genes.use)
  return(to.return)
}


#' Differential expression testing using Student's t-test
#'
#' Identify differentially expressed SJs between two groups of cells using
#' the Student's t-test
#'
#' @inheritParams FindMarkers
#' @inheritParams DiffExpTest
#'
#' @return Returns a p-value ranked matrix of putative differentially expressed
#' SJs.
#'
#' @importFrom stats t.test
#' @importFrom pbapply pblapply
#'
#' @export
#'
DiffTTest <- function(
  object,
  cells.1,
  cells.2,
  genes.use = NULL,
  print.bar = TRUE,
  NT = 1
) {
  data.test <- psi(object = object)
  genes.use <- SetIfNull(x = genes.use, default = rownames(x = data.test))
  # data.use <- object@data
  if (print.bar) {
    p_val <- unlist(
      x = pbapply::pblapply(
        X = genes.use,
        FUN = function(x) {
          tryCatch(t.test(x = as.numeric(data.test[x, cells.1])[!is.na(as.numeric(data.test[x, cells.1]))],
                          y = as.numeric(data.test[x, cells.2])[!is.na(as.numeric(data.test[x, cells.2]))])$p.value, error = function(e) NA)
        }, cl = NT)
    )
  } else {
    p_val <- unlist(
      x = parallel::mclapply(
        X = genes.use,
        FUN = function(x) {
          tryCatch(t.test(x = as.numeric(data.test[x, cells.1])[!is.na(as.numeric(data.test[x, cells.1]))],
                          y = as.numeric(data.test[x, cells.2])[!is.na(as.numeric(data.test[x, cells.2]))])$p.value, error = function(e) NA)
        }, mc.cores = NT)
    )
  }
  to.return <- data.frame(p_val,row.names = genes.use)
  return(to.return)
}



#' Differential expression testing using Tobit models
#'
#' Identifies differentially expressed SJs between two groups of cells using
#' Tobit models, as proposed in Trapnell et al., Nature Biotechnology, 2014
#'
#' @inheritParams FindMarkers
#' @inheritParams DiffExpTest
#'
#' @return Returns a p-value ranked matrix of putative differentially expressed
#' SJs.
#'
#' @export
#'
#'
TobitTest <- function(
  object,
  cells.1,
  cells.2,
  genes.use = NULL,
  print.bar = TRUE,
  NT = 1
) {
  data.test <- psi(object = object)
  genes.use <- SetIfNull(x = genes.use, default = rownames(x = data.test))
  #print(genes.diff)
  to.return <- TobitDiffExpTest(
    data1 = data.test[, cells.1],
    data2 = data.test[, cells.2],
    mygenes = genes.use,
    print.bar = print.bar,
    NT = NT
  )
  return(to.return)
}



#' Negative binomial test for UMI-count based data
#'
#' Identifies differentially expressed SJs between two groups of cells using
#' a negative binomial generalized linear model
#'
#' @param object ICASDataSet object
#' @param cells.1 Group 1 cells
#' @param cells.2 Group 2 cells
#' @param genes.use SJs to use for test
#' @param print.bar Print progress bar
#' @param min.cells Minimum number of cells threshold
#'
#' @return Returns a p-value ranked matrix of putative differentially expressed SJs.
#'
#' @importFrom MASS glm.nb
#' @importFrom pbapply pbapply
#' @importFrom stats var as.formula
#'
#' @export
#'
#'
NegBinomDETest <- function(
  object,
  cells.1,
  cells.2,
  genes.use = NULL,
  print.bar = TRUE,
  min.cells = 3,
  NT = 1
) {
  genes.use <- SetIfNull(x = genes.use, default = rownames(x = psi(object = object)))
  # check that the gene made it through the any filtering that was done
  genes.use <- genes.use[genes.use %in% rownames(x = psi(object = object))]

  to.test.data <- psi(object = object)[genes.use, c(cells.1, cells.2)]
  to.test <- data.frame(intercept = rep(1, length(c(cells.1, cells.2))), row.names = c(cells.1, cells.2))
  to.test[cells.1, "group"] <- "A"
  to.test[cells.2, "group"] <- "B"
  to.test$group <- factor(x = to.test$group)
  latent.vars <- c("group", "intercept")
  if (print.bar) {
    p_val <- unlist(
      x = pbapply::pblapply(
        X = genes.use,
        FUN = function(x) {
          to.test[, "GENE"] <- as.numeric(x = to.test.data[x, ])
          to.test <- na.omit(to.test)
          # check that gene is expressed in specified number of cells in one group
          if (sum(to.test$group == "A") < min.cells || sum(to.test$group == "B") < min.cells) {
            warning(paste0("Skipping SJ --- ", x, ". Fewer than ", min.cells, " in at least one of the two clusters."))
            return(2)
          }
          # check that variance between groups is not 0
          if (var(x = to.test$GENE) == 0) {
            warning(paste0("Skipping SJ -- ", x, ". No variance in expression between the two clusters."))
            return(2)
          }
          fmla <- as.formula(paste0("GENE ", " ~ ", paste(latent.vars, collapse = "+")))
          p.estimate <- 2
          try(expr = p.estimate <- summary(object = glm.nb(formula = fmla, data = to.test))$coef[2, 4], silent = TRUE)
          return(p.estimate)
        }, cl = NT
      )
    )
  } else {
    p_val <- unlist(
      x = parallel::mclapply(
        X = genes.use,
        FUN = function(x) {
          to.test[, "GENE"] <- as.numeric(x = to.test.data[x, ])
          to.test <- na.omit(to.test)
          # check that gene is expressed in specified number of cells in one group
          if (sum(to.test$group == "A") < min.cells || sum(to.test$group == "B") < min.cells) {
            warning(paste0("Skipping SJ --- ", x, ". Fewer than ", min.cells, " in at least one of the two clusters."))
            return(2)
          }
          # check that variance between groups is not 0
          if (var(x = to.test$GENE) == 0) {
            warning(paste0("Skipping SJ -- ", x, ". No variance in expression between the two clusters."))
            return(2)
          }
          fmla <- as.formula(paste0("GENE ", " ~ ", paste(latent.vars, collapse = "+")))
          p.estimate <- 2
          try(expr = p.estimate <- summary(object = glm.nb(formula = fmla, data = to.test))$coef[2, 4], silent = TRUE)
          return(p.estimate)
        }, mc.cores = NT
      )
    )
  }

  if (length(x = which(x = p_val == 2)) > 0){
    genes.use <- genes.use[-which(x = p_val == 2)]
    p_val <- p_val[! p_val == 2]
  }
  to.return <- data.frame(p_val, row.names = genes.use)
  return(to.return)
}




globalVariables(names = 'min.cells', package = 'ICAS', add = TRUE)
#' Poisson test for UMI-count based data
#'
#' Identifies differentially expressed SJs between two groups of cells using
#' a poisson generalized linear model
#
#' @param object ICASDataSet object
#' @param cells.1 Group 1 cells
#' @param cells.2 Group 2 cells
#' @param min.cells Minimum number of cells expressing the SJ in at least one of the two groups
#' @param genes.use SJs to use for test
#' @param latent.vars Latent variables to test
#' @param print.bar Print progress bar
#'
#' @return Returns a p-value ranked matrix of putative differentially expressed SJs.
#'
#' @importFrom pbapply pbapply
#' @importFrom stats var as.formula glm
#'
#' @export
#'
#'
PoissonDETest <- function(
  object,
  cells.1,
  cells.2,
  min.cells = 3,
  genes.use = NULL,
  print.bar = TRUE,
  NT = 1
) {
  genes.use <- SetIfNull(x = genes.use, default = rownames(x = psi(object = object)))
  # check that the gene made it through the any filtering that was done
  genes.use <- genes.use[genes.use %in% rownames(x = psi(object = object))]

  to.test.data <- psi(object = object)[genes.use, c(cells.1, cells.2)]
  to.test <- data.frame(intercept = rep(1, length(c(cells.1, cells.2))), row.names = c(cells.1, cells.2))
  to.test[cells.1, "group"] <- "A"
  to.test[cells.2, "group"] <- "B"
  to.test$group <- factor(x = to.test$group)
  latent.vars <- c("group", "intercept")

  if (print.bar) {
    p_val <- unlist(
      x = pbapply::pblapply(
        X = genes.use,
        FUN = function(x) {
          to.test[, "GENE"] <- as.numeric(x = to.test.data[x, ])
          to.test <- na.omit(to.test)
          # check that gene is expressed in specified number of cells in one group
          if (sum(to.test$group == "A") < min.cells || sum(to.test$group == "B") < min.cells) {
            warning(paste0("Skipping SJ --- ", x, ". Fewer than", min.cells, " in at least one of the two clusters."))
            return(2)
          }
          # check that variance between groups is not 0
          if (var(to.test$GENE) == 0) {
            message("what") # what?
            warning(paste0("Skipping SJ -- ", x, ". No variance in expression between the two clusters."))
            return(2)
          }
          fmla <- as.formula(
            object = paste0("GENE ", " ~ ", paste(latent.vars, collapse = "+"))
          )
          return(summary(object = stats::glm(formula = fmla, data = to.test, family = "poisson"))$coef[2,4])
        }, cl = NT
      )
    )
  } else {
    p_val <- unlist(
      x = parallel::mclapply(
        X = genes.use,
        FUN = function(x) {
          to.test[, "GENE"] <- as.numeric(x = to.test.data[x, ])
          to.test <- na.omit(to.test)
          # check that gene is expressed in specified number of cells in one group
          if (sum(to.test$group == "A") < min.cells || sum(to.test$group == "B") < min.cells) {
            warning(paste0("Skipping SJ --- ", x, ". Fewer than ", min.cells, " in at least one of the two clusters."))
            return(2)
          }
          # check that variance between groups is not 0
          if (var(to.test$GENE) == 0) {
            message("what") # what?
            warning(paste0("Skipping SJ -- ", x, ". No variance in expression between the two clusters."))
            return(2)
          }
          fmla <- as.formula(
            object = paste0("GENE ", " ~ ", paste(latent.vars, collapse = "+"))
          )
          return(summary(object = stats::glm(formula = fmla, data = to.test, family = "poisson"))$coef[2,4])
        }, mc.cores = NT
      )
    )
  }
  if (length(x = which(x = p_val == 2)) > 0) {
    genes.use <- genes.use[-which(x = p_val == 2)]
    p_val <- p_val[! p_val == 2]
  }
  to.return <- data.frame(p_val, row.names = genes.use)
  return(to.return)
}


#' Differential expression using MAST
#'
#' Identifies differentially expressed SJs between two groups of cells using
#' a hurdle model tailored to scRNA-seq data. Utilizes the MAST package to run
#' the DE testing.
#'
#' @references Andrew McDavid, Greg Finak and Masanao Yajima (2017). MAST: Model-based
#' Analysis of Single Cell Transcriptomics. R package version 1.2.1.
#' https://github.com/RGLab/MAST/
#' @param object ICASDataSet object
#' @param cells.1 Group 1 cells
#' @param cells.2 Group 2 cells
#' @param genes.use SJs to use for test
#' @param latent.vars Confounding variables to adjust for in DE test. Default is
#' "nUMI", which adjusts for cellular depth (i.e. cellular detection rate). For
#' non-UMI based data, set to nGene instead.
#' @param \dots Additional parameters to zero-inflated regression (zlm) function
#' in MAST
#' @details
#' To use this method, please install MAST, using instructions at https://github.com/RGLab/MAST/
#'
#' @return Returns a p-value ranked matrix of putative differentially expressed
#' SJs.
#'
#' @importFrom stats relevel
#' @importFrom utils installed.packages
#' @importFrom MAST FromMatrix
#' @importFrom MAST zlm
#'
#' @export
#'
#'
MASTDETest <- function(
  object,
  cells.1,
  cells.2,
  genes.use = NULL,
  ...
) {
  # Check for MAST
  if (!'MAST' %in% rownames(x = installed.packages())) {
    stop("Please install MAST - learn more at https://github.com/RGLab/MAST")
  }
  data.test <- psi(object)
  genes.use <- SetIfNull(x = genes.use, default = rownames(x = data.test))
  # check that the gene made it through the any filtering that was done
  genes.use <- genes.use[genes.use %in% rownames(x = data.test)]

  my.latent <- rep(1, length(c(cells.1, cells.2)))
  coldata <- data.frame(intercept = my.latent, row.names = c(cells.1, cells.2))
  coldata[cells.1, "group"] <- "Group1"
  coldata[cells.2, "group"] <- "Group2"
  coldata$group <- factor(x = coldata$group)
  coldata$wellKey <- rownames(x = coldata)
  latent.vars <- c("condition", "intercept")

  countdata.test <- data.test[genes.use, rownames(x = coldata)]
  countdata.test[is.na(countdata.test)] <- 0
  fdat <- data.frame(rownames(x = countdata.test))
  colnames(x = fdat)[1] <- "primerid"
  rownames(x = fdat) <- fdat[, 1]
  sca <- MAST::FromMatrix(
    exprsArray = as.matrix(x = countdata.test),
    cData = coldata,
    fData = fdat
  )
  cond <- factor(x = SummarizedExperiment::colData(sca)$group)
  cond <- relevel(x = cond, ref = "Group1")
  SummarizedExperiment::colData(sca)$condition <- cond
  fmla <- as.formula(
    object = paste0(" ~ ", paste(latent.vars, collapse = "+"))
  )
  zlmCond <- MAST::zlm(formula = fmla, sca = sca, ...)
  summaryCond <- summary(object = zlmCond, doLRT = 'conditionGroup2')
  summaryDt <- summaryCond$datatable
  # fcHurdle <- merge(
  #   summaryDt[contrast=='conditionGroup2' & component=='H', .(primerid, `Pr(>Chisq)`)], #hurdle P values
  #   summaryDt[contrast=='conditionGroup2' & component=='logFC', .(primerid, coef, ci.hi, ci.lo)], by='primerid'
  # ) #logFC coefficients
  # fcHurdle[,fdr:=p.adjust(`Pr(>Chisq)`, 'fdr')]
  p_val <- summaryDt[summaryDt$component == "H", 4]
  genes.return <- summaryDt[summaryDt$component == "H", 1]
  # p_val <- subset(summaryDt, component == "H")[, 4]
  # genes.return <- subset(summaryDt, component == "H")[, 1]
  to.return <- data.frame(p_val = p_val$`Pr(>Chisq)`, row.names = genes.return$primerid)
  return(to.return)
}



#' Differential expression using Wilcoxon Rank Sum
#'
#' Identifies differentially expressed SJs between two groups of cells using
#' a Wilcoxon Rank Sum test
#' @param object ICASDataSet object
#' @param cells.1 Group 1 cells
#' @param cells.2 Group 2 cells
#' @param genes.use SJs to use for test
#' @param print.bar Print a progress bar
#' @param ... Extra parameters passed to wilcox.test
#'
#' @return Returns a p-value ranked matrix of putative differentially expressed
#' SJs.
#'
#' @importFrom pbapply pbsapply
#' @importFrom stats wilcox.test
#'
#' @export
#'
#'
WilcoxDETest <- function(
  object,
  cells.1,
  cells.2,
  genes.use = NULL,
  print.bar = TRUE,
  NT = 1,
  ...
) {
  data.test <- psi(object)
  genes.use <- SetIfNull(x = genes.use, default = rownames(x = data.test))
  # check that the gene made it through the any filtering that was done
  genes.use <- genes.use[genes.use %in% rownames(x = data.test)]
  coldata <- colData(object)[c(cells.1, cells.2), ]
  group <- RandomName(length = 23)
  coldata[cells.1, group] <- "Group1"
  coldata[cells.2, group] <- "Group2"
  coldata[, group] <- factor(x = coldata[, group])
  coldata$wellKey <- rownames(x = coldata)
  countdata.test <- data.test[genes.use, rownames(x = coldata), drop = FALSE]
  # mysapply <- if (print.bar) {pbsapply} else {sapply}
  if(print.bar) {
    p_val <- pbsapply(
      X = 1:nrow(x = countdata.test),
      FUN = function(x) {
        return(tryCatch(wilcox.test(countdata.test[x, ] ~ coldata[, group], ...)$p.value, error = function(e) NA))
      }, cl = NT
    )
  } else {
    p_val <- pbsapply(
      X = 1:nrow(x = countdata.test),
      FUN = function(x) {
        return(tryCatch(wilcox.test(countdata.test[x, ] ~ coldata[, group], ...)$p.value, error = function(e) NA))
      }, cl = NT
    )
  }

  genes.return <- rownames(x = countdata.test)
  to.return <- data.frame(p_val, row.names = genes.return)
  return(to.return)
}


#' @importFrom lmtest waldtest

LRDETest <- function(
  object,
  cells.1,
  cells.2,
  genes.use = NULL,
  print.bar = TRUE,
  NT = 1,
  ...
) {
  data.test <- psi(object = object)
  genes.use <- SetIfNull(x = genes.use, default = rownames(x = data.test))
  # check that the gene made it through the any filtering that was done
  genes.use <- genes.use[genes.use %in% rownames(x = data.test)]
  coldata <- colData(object)[c(cells.1, cells.2), ]
  coldata[cells.1, "group"] <- "Group1"
  coldata[cells.2, "group"] <- "Group2"
  coldata$group <- factor(x = coldata$group)
  coldata$wellKey <- rownames(x = coldata)
  countdata.test <- data.test[genes.use, rownames(x = coldata)]
  # countdata.test[is.na(countdata.test)] <- 0
  # mysapply <- if (print.bar) {pbsapply} else {sapply}
  if(print.bar) {
    p_val <- pbsapply(
      X = 1:nrow(x = countdata.test),
      FUN = function(x) {
        # model1 <- glm(coldata$group ~ countdata.test[x, ],family = "binomial")
        # return(1 - lmtest::waldtest(model1, test = "F")$`Pr(>F)`[2])
        model1 <- tryCatch(aov(countdata.test[x, ] ~ coldata$group), error = function(e) NULL)
        return(tryCatch(lmtest::waldtest(model1, test = "F")$`Pr(>F)`[2], error = function(e) NA))
      }, cl = NT
    )
  } else {
    p_val <- pbsapply(
      X = 1:nrow(x = countdata.test),
      FUN = function(x) {
        # model1 <- glm(coldata$group ~ countdata.test[x, ],family = "binomial")
        # return(1 - lmtest::waldtest(model1, test = "F")$`Pr(>F)`[2])
        model1 <- tryCatch(aov(countdata.test[x, ] ~ coldata$group), error = function(e) NULL)
        return(tryCatch(lmtest::waldtest(model1, test = "F")$`Pr(>F)`[2], error = function(e) NA))
      }, cl = NT
    )
  }

  genes.return <- rownames(x = countdata.test)
  to.return <- data.frame(p_val, row.names = genes.return)
  return(to.return)
}


phyperTest <- function(
  object,
  cells.1,
  cells.2,
  genes.use = NULL,
  print.bar = TRUE,
  NT = 1,
  ...
) {
  data.test <- psi(object = object)
  genes.use <- SetIfNull(x = genes.use, default = rownames(x = data.test))
  # check that the gene made it through the any filtering that was done
  genes.use <- genes.use[genes.use %in% rownames(x = data.test)]
  coldata <- colData(object)[c(cells.1, cells.2), ]
  coldata[cells.1, "group"] <- "Group1"
  coldata[cells.2, "group"] <- "Group2"
  coldata$group <- factor(x = coldata$group)
  coldata$wellKey <- rownames(x = coldata)
  countdata.test <- data.test[genes.use, rownames(x = coldata)]

  # mysapply <- if (print.bar) {pbsapply} else {sapply}

  if(print.bar) {
    p_val <- pbsapply(
      X = genes.use,
      FUN = function(x) {
        cutof <- mean(countdata.test[x, ], na.rm = TRUE)
        k = sum(countdata.test[x, cells.1] >= cutof, na.rm = TRUE)
        D = sum(countdata.test[x, ] >= cutof, na.rm = TRUE)
        n = sum(!is.na(countdata.test[x, ])) - D
        N = sum(!is.na(countdata.test[x, cells.1]))
        pval = tryCatch(phyper(k, D, n, N, lower.tail = FALSE), error = function(e) NA)
        return(pval)
      }, cl = NT
    )
  } else {
    p_val <- pbsapply(
      X = genes.use,
      FUN = function(x) {
        cutof <- mean(countdata.test[x, ], na.rm = TRUE)
        k = sum(countdata.test[x, cells.1] >= cutof, na.rm = TRUE)
        D = sum(countdata.test[x, ] >= cutof, na.rm = TRUE)
        n = sum(!is.na(countdata.test[x, ])) - D
        N = sum(!is.na(countdata.test[x, cells.1]))
        pval = tryCatch(phyper(k, D, n, N, lower.tail = FALSE), error = function(e) NA)
        return(pval)
      }, cl = NT
    )
  }
  # return(as.data.frame(p_val))
  to.return <- data.frame(p_val, row.names = genes.use)
  return(to.return)
}


#' Differential expression using beta-binomial models
#'
#' Identifies differentially expressed SJs between two groups of cells using a beta-binomial models
#' @param object ICASDataSet object
#' @param cells.1 Group 1 cells
#' @param cells.2 Group 2 cells
#' @param genes.use SJs to use for test
#' @param print.bar Print a progress bar
#' @param maxit maximum number of (usually Fisher-scoring) iterations allowed.
#' Decreasing maxit speeds up the function, but can weaken statistical reliability.
#' @param confounder The confounder to regress out.
#' @param NT
#'
#' @return Returns a p-value ranked matrix of putative differentially expressed
#' SJs.
#'
#' @importFrom pbapply pbsapply
#' @importFrom stats wilcox.test
#' @importFrom VGAM vglm
#'
#' @export
#'
bbTest <- function(object, cells.1, cells.2, genes.use, print.bar = TRUE, maxit = 10, confounder = NULL, NT = 1) {
  sjgr <- as(row.names(object), "GRanges")
  if (print.bar) {
    res <- pbapply::pblapply(
      X = genes.use,
      FUN = function(x) {
        bb_model_test(object = object, sjgr = sjgr, sj = x, cells.1, cells.2, maxit = maxit, confounder = confounder)
      },
      cl = NT)
    res <- do.call(rbind, res)
  } else {
    res <- parallel::mclapply(
      FUN = function(x) {
        bb_model_test(object = object, sjgr = sjgr, sj = x, cells.1, cells.2, maxit = maxit, confounder = confounder)
      },
      X = genes.use,
      mc.cores = NT
    )
    res <- do.call(rbind, res)
  }
  row.names(res) <- genes.use
  return(res)
}
tangchao7498/ICAS documentation built on Jan. 28, 2021, 3:56 p.m.