#' Test association of a per-gene attribute with each column in a GeneMatrix
#'
#' Test the association of a per-gene attribute y with each column, where a column
#' can represent a gene set, a motif, transcription factor targets, or other genes
#' that are functionally related
#'
#' @param m A GeneMatrix object
#' @param genes A vector of gene names (must match the gene names in the GeneMatrix)
#' @param y A per-gene metric of the same length as the vector of gene names
#' @param method Method used to test association between the per-gene metric and
#' each column - either c("lasso", "t.test", "wilcoxon" or "hypergeometric")
#' @param ... Additional arguments that will be given to the test in question
#'
#' @details A t-test or Wilcoxon test can take an additional argument of \code{alternative},
#' which allows a one-sided test with an alternative hypothesis of "less" or "greater".
#' A t=test can also take the \code{var.equal} argument, indicating whether to treat the
#' variance in and outside each group as identical.
#'
#' LASSO fits a penalized least squares model:
#'
#' min (y - \beta X)^2 + \lambda \sum|\beta|}
#'
#' @examples
#'
#'library(org.Sc.sgd.db)
#'
#'n_genes <- 200
#'genes <- sample(mappedkeys(org.Sc.sgdGO), n_genes)
#'
#'mm <- GOMembershipMatrix(org.Sc.sgdGO, min_size = 5,
#' max_size = 50, chosen_genes = genes)
#'# some genes are dropped because they do not have GO categories
#'
#'n_genes <- nrow(mm)
#'genes <- mm@geneData$ID
#'
#'beta = c(20:1, rep(0, times = nrow(mm@colData)-20))
#'y = c(as.matrix(mm) %*% beta + rnorm(n_genes, 0, 2))
#'
#'results <- TestAssociation(mm, genes, y)
#'View(results@colData)
#'
#' @export
TestAssociation = function(m, genes, y, method = "lasso", ...) {
if (!inherits(m, "GeneMatrix")) {
stop("TestAssociation should be given a GeneMatrix object")
}
if (!is.null(m@fit)) {
warning(paste0("Performing a test on a model that has already been ",
"tested, previous results may be overwritten"))
}
# prefiltering of membership matrix
v <- genes %in% m@geneData$ID
y <- y[v]
genes <- genes[v]
m <- m[genes, ]
m <- m[, colSums(m@matrix != 0) > 0] # filter columns *after* rows
m@geneData$y <- y
# apply desired method
method = match.arg(method, c("lasso", "wilcoxon", "hypergeometric", "t.test"))
if (method == "lasso") {
m@fit <- glmnet::cv.glmnet(m@matrix, y, ...)
beta_ind <- which(m@fit$lambda.1se == m@fit$lambda)
m@colData$beta <- apply(m@fit$glmnet.fit$beta, 1,
function(r) r[r != 0][1])
m@colData$beta1se <- as.numeric(m@fit$glmnet.fit$beta[, as.numeric(beta_ind)])
m@colData$step <- apply(m@fit$glmnet.fit$beta, 1,
function(r) which(r != 0)[1])
m@colData$ranking <- -abs(m@colData$step)
m@rankingMetric <- "ranking"
m@effectMetric <- "beta"
m@plottingMetric <- "beta1se"
}
else if (method == "t.test") {
tt <- vectorized_t_test(m@matrix > 0, y, tbl = TRUE, ...)
m@colData$estimate <- tt$estimate
m@colData$p.value <- tt$p.value
m@rankingMetric <- "p.value"
m@effectMetric <- "estimate"
m@plottingMetrix <- "estimate"
}
else if (method == "wilcoxon") {
w <- vectorized_wilcoxon_test(m@matrix > 0, y, tbl = TRUE, ...)
m@colData$auc <- w$auc
m@colData$p.value <- w$p.value
m@rankingMetric <- "p.value"
m@effectMetric <- "auc"
m@plottingMetric <- "auc"
}
else if (method == "hypergeometric") {
if (!is.logical(y) & !(is.numeric(y) & all(y == 0 | y == 1))) {
stop("For hypergeometric test, y must be either logical or binary")
}
stop("Not yet implemented")
mat <- m@matrix > 0
overlaps <- colSums(mat[as.logical(y), ])
total_genes <- nrow(m)
total_y <- sum(y)
totals <- colSums(mat)
#m@colData$phi <-
m@colData$p.value <- phyper(overlaps - 1, # number of white balls in each set
total_y, # white balls in urn
total_genes - total_genes, # black balls in urn
totals, # number drawn in each set
lower.tail = FALSE)
m@rankingMetric <- "p.value"
m@effectMetric <- "phi"
m@plottingMetric <- "phi"
}
# save the method used
m@assocMethod <- method
# finally, compute the difference within and between the sets
mat <- m@matrix
not_mat <- 1 - mat
mean_within <- colSums(y * mat) / colSums(mat)
mean_outside <- colSums(y * not_mat) / colSums(not_mat)
m@colData$MeanDifference <- mean_within - mean_outside
m
}
#' Compute mean differences between "in a set" and "not in a set"
#'
#' For each set in a GeneMatrix, compute the difference in y between
#' "in the set"
#' and "outside the set". It will be set in the MeanDifference column
#'
#' @param m GeneMatrix object
#'
#' @export
mean_difference <- function(m) {
y <- m@geneData$y
if (is.null(y)) {
stop("y not yet set, has TestAssociation been performed?")
}
sub_m <- m@matrix
sub_m_not <- 1 - sub_m
mean_within <- colSums(y * sub_m) / colSums(sub_m)
mean_outside <- colSums(y * sub_m_not) / colSums(sub_m_not)
m@colData$MeanDifference <- mean_within - mean_outside
m
}
#' Return a vector of genes that pass a threshold for includions
#'
#' Return a vector of genes that pass a threshold of "significance"
#' or other kind of inclusion. For tests with a p-value (t-test,
#' Wilcoxon, hypergeometric), this is FDR-controlled p-values.
#' For LASSO, this is all cases where beta1sd != 0.
#'
#' @param m GeneMatrix object
#' @param alpha Threshold for (corrected) p-values, not used in LASSO
#' @param method Method, passed to \code{\link{p.adjust}}, or "qvalue"
#' to use qvalue. Not used in LASSO
#' @param ... Extra arguments to pass on to correction method
#'
#' @seealso \link{p.adjust}, \link{p.adjust.methods}
ThresholdSets <- function(m, alpha = .05, method = "fdr", ...) {
if(m@assocMethod == "lasso"){
# filtering of lasso values where beta1sd != 0
m@colData$ID[m@colData$beta1se != 0]
}else{
# p-value filtering based on provided filter_method
if (method == "qvalue") {
p.adjusted <- qvalue::qvalue(m@colData$p.value, ...)$qvalues
} else {
p.adjusted <- p.adjust(m@colData$p.value, method, ...)
}
m@colData$ID[!is.na(p.adjusted) & p.adjusted < alpha]
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.