Nothing
##' @title Calculate results of a crispr screen from a contrast
##' @description This is a wrapper function that enables direct generation of target-level p-values from a crispr screen.
##' @param fit An object of class \code{MArrayLM} containing, at minimum, a \code{t} slot with t-statistics from the comparison,
##' a \code{df.residual} slot with the corresponding residuals fo the model fits, and an \code{Amean} slot with the respective mean abundances.
##' @param annotation An annotation file for the experiment. gRNAs are annotated by
##' row, and must minimally contain columns \code{geneSymbol} and \code{geneID}.
##' @param RRAalphaCutoff A cutoff to use when defining gRNAs with significantly altered abundance during the RRAa aggregation step, which may be specified
##' as a single numeric value on the unit interval or as a logical vector. When supplied as a logical vector (of length equal to \code{nrows(fit)}),
##' this parameter directly indicates the gRNAs to include during RRAa aggregation. Otherwise, if \code{scoring} is set
##' to \code{pvalue} or \code{combined}, this parameter is interpreted as the maximum nominal p-value required to consider a gRNA's abundance meaningfully
##' altered during the aggregation step. If \code{scoring} is \code{fc}, this parameter is interpreted as the proportion of the list to be considered
##' meaningfully altered in the experiment (e.g., if \code{RRAalphaCutoff} is set to 0.05, only consider the rankings of the 5% most upregulated
##' (or downregulated) gRNAs for the purposes of RRAa calculations).
##'
##' Note that this function uses directional tests to identify enriched or depleted targets, and when RRAalphaCutoff is
##' provided as a logical vector, only one of these hypotheses is implicitly specified; this means that enrichment and
##' depletion cannot be .
##' @param permutations The number of permutations to use during the RRAa aggregation step.
##' @param contrast.term If a fit object with multiple coefficients is passed in, a string indiating the coefficient of interest.
##' @param scoring The gRNA ranking method to use in RRAa aggregation. May take one of three values: \code{pvalue}, \code{fc},
##' or '\code{combined}'. \code{pvalue} indicates that the gRNA ranking statistic should be created from the (one-sided) p-values in the
##' fit object. \code{fc} indicates that the ranks of the gRNA coefficients should be used instead, and \code{combined} indicates that
##' that the coefficents should be used as the ranking statistic but gRNAs are discarded in the aggregation step based on the corresponding nominal
##' p-value in the fit object.
##' @param permutation.seed numeric seed for permutation reproducibility.
##' Default: \code{NULL} means to not set any seed. This argument is passed
##' through to \code{\link{ct.RRAaPvals}}.
##' @return A dataframe containing gRNA-level and target-level statistics. In addition to the information present in the supplied annotation object,
##' the returned object indicates P-values and Q-values for the depletion and enrichment of each gRNA and associated target, the median log2 fold
##' change estimate among all gRNAs associated with the target, and Rho statistics that are calculated internally by the RRAa algorithm that may be
##' useful in ranking targets that are considered significant at a given alpha or false discovery threshold.
##' @author Russell Bainer
##' @examples data('fit')
##' data('ann')
##' output <- ct.generateResults(fit, ann, permutations = 10)
##' head(output)
##' @return A `resultsDF` formatted dataframe containing gene-level statistics.
##' @examples
##' p = seq(0, 1, length.out=20)
##' fc = seq(-3, 3, length.out=20)
##' fc[2] = NA
##' fc[3] = -20
##' stats = data.frame(
##' Depletion.P=p,
##' Enrichment.P=rev(p),
##' fc=fc
##' )
##' ct.applyAlpha(stats,scoring="combined")
##' @export
ct.generateResults <- function(fit,
annotation,
RRAalphaCutoff = 0.1,
permutations = 1000,
contrast.term = NULL,
scoring = c("combined", "pvalue", "fc"),
permutation.seed = NULL) {
## figure out the scoring method
scoring <- match.arg(scoring)
#make sure that the Fit has P-values.
if(!('p.value' %in% names(fit))){
stop('Evidence for differential gRNA abundance (p-values) has not been estimated for the provided fit object. Please do so using eBayes(), treat(), or similar method.')
}
if(ncol(fit$coefficients) > 1){
if(is.null(contrast.term)){
stop("The fit object contains multiple coefficients. Please specify a contrast.term.")
}
fit <- ct.preprocessFit(fit, contrast.term)
}
#The code below is now handled in the prepareAnnotation function but is retained for legacy purposes.
## filter the annotation file as necessary for downstream processes, and format it for pval generation
#if(!setequal(row.names(fit), row.names(annotation))){
# if(length(setdiff(row.names(fit), row.names(annotation))) > 0){
# stop("fit contains elements not present in the annotation file.")
# }
# message("The annotation file contains elements not present in the fit object. They will be discarded for downstream analyses.")
# annotation <- annotation[row.names(fit),]
#}
## Prepare the ranking values and calculate p-values
pvals <- ct.DirectionalTests(fit)
rra.input <- ct.applyAlpha(cbind(pvals, FC=fit$coefficients[,1]), RRAalphaCutoff, scoring )
## Add pvalues and qvalues
key <- ct.prepareAnnotation(annotation, fit, throw.error = FALSE)
geneP.depletion <-
ct.RRAaPvals(
rra.input[,"scores.deplete", drop=FALSE],
g.key = key,
permute = permutations,
permutation.seed = permutation.seed
)
geneP.enrichment <-
ct.RRAaPvals(
rra.input[,"scores.enrich", drop=FALSE],
g.key = key,
permute = permutations,
permutation.seed = permutation.seed
)
## generate the Rho values and ranks:
## These are redundant with work done in ct.RRAPvals ...
rhoEnrich <- ct.RRAalpha(rra.input[,"scores.enrich", drop=FALSE],
g.key = key,
shuffle = FALSE)
rhoDeplete <- ct.RRAalpha(rra.input[,"scores.deplete", drop=FALSE],
g.key = key,
shuffle = FALSE)
annotFields <- c("ID", "target", "geneID", "geneSymbol")
if(!all(annotFields %in% names(key))){
message(paste("Some expected columns are not present in the supplied annotation file.", call. = FALSE))
annotFields <- intersect(annotFields, names(key))
message(paste("Only the following information will be included in the output:", paste(annotFields, collapse = ',')))
}
## make the DF
summaryDF <- key[,annotFields]
summaryDF$geneSymbol <- as.character(summaryDF$geneSymbol)
summaryDF["gRNA Log2 Fold Change"] <- fit$coefficients[row.names(summaryDF),1]
summaryDF["gRNA Depletion P"] <- signif(pvals[row.names(summaryDF),1], 5)
summaryDF["gRNA Depletion Q"] <- signif(p.adjust(pvals[,'Depletion.P'], "fdr")[row.names(summaryDF)], 5)
summaryDF["gRNA Enrichment P"] <- signif(pvals[row.names(summaryDF),2], 5)
summaryDF["gRNA Enrichment Q"] <- signif(p.adjust(pvals[,'Enrichment.P'], "fdr")[row.names(summaryDF)], 5)
summaryDF["Target-level Enrichment P"] <- geneP.enrichment[summaryDF$geneSymbol]
summaryDF["Target-level Enrichment Q"] <- p.adjust(geneP.enrichment,"fdr")[summaryDF$geneSymbol]
summaryDF["Target-level Depletion P"] <- geneP.depletion[summaryDF$geneSymbol]
summaryDF["Target-level Depletion Q"] <- p.adjust(geneP.depletion,"fdr")[summaryDF$geneSymbol]
## Add a column for the median FC for each target:
medianfc <- tapply(summaryDF[,"gRNA Log2 Fold Change"], summaryDF[,"geneSymbol"], median, na.rm=TRUE)
summaryDF["Median log2 Fold Change"] <- as.numeric(medianfc[ summaryDF$geneSymbol ]) # One per gene rep'd out to one per guide
summaryDF["Rho_enrich"] <-rhoEnrich[summaryDF$geneSymbol]
summaryDF["Rho_deplete"] <- rhoDeplete[summaryDF$geneSymbol]
## order them
summaryDF <- summaryDF[order(summaryDF[,"Rho_enrich"], decreasing = FALSE),]
summaryDF <- summaryDF[order(summaryDF[,"Target-level Enrichment P"], decreasing = FALSE),]
return(summaryDF)
}
##' Apply RRA 'alpha' cutoff to RRAalpha input
##'
##' The 'alpha' part of RRAalpha is used to consider only the top guide-level scores for gene-level
##' statistics. Practically, all guides failing the cutoff get a pvalue of 1. There are three ways of
##' determining which guides fail. See 'scoring' below.
##' @param stats three-column numeric matrix with pvalues for down and up one-sided test with guide-level fold
##' changes (coefficients from the relevant contrast).
##' @param RRAalphaCutoff A cutoff to use when defining gRNAs with significantly altered abundance during the RRAa aggregation step, which may be specified
##' as a single numeric value on the unit interval or as a logical vector. When supplied as a logical vector (of length equal to \code{nrows(fit)}),
##' this parameter directly indicates the gRNAs to include during RRAa aggregation. Otherwise, if \code{scoring} is set
##' to \code{pvalue} or \code{combined}, this parameter is interpreted as the maximum nominal p-value required to consider a gRNA's abundance meaningfully
##' altered during the aggregation step. If \code{scoring} is \code{fc}, this parameter is interpreted as the proportion of the list to be considered
##' meaningfully altered in the experiment (e.g., if \code{RRAalphaCutoff} is set to 0.05, only consider the rankings of the 5% most upregulated
##' (or downregulated) gRNAs for the purposes of RRAa calculations).
##' @param scoring The gRNA ranking method to use in RRAa aggregation. May take one of three values: \code{pvalue}, \code{fc},
##' or '\code{combined}'. \code{pvalue} indicates that the gRNA ranking statistic should be created from the (one-sided) p-values in the
##' fit object. \code{fc} indicates that the ranks of the gRNA coefficients should be used instead, and \code{combined} indicates that
##' that the coefficents should be used as the ranking statistic but gRNAs are discarded in the aggregation step based on the corresponding nominal
##' p-value in the fit object.
##' @return data.frame with guide-level pvals, fold change, and scores.deplete and scores.enrich which are the
##' input the RRAalpha
##' @author Russell Bainer
##' @export
ct.applyAlpha <- function(stats, RRAalphaCutoff=0.1, scoring = c("combined", "pvalue", "fc")) {
scoring <- match.arg(scoring)
pvals = stats[,1:2]
foldchange <- cbind(stats[,3], -stats[,3])
##RRAalphaCutoff format
if (length(RRAalphaCutoff) == 1) {
rra.logic <- FALSE
if(!is.numeric(RRAalphaCutoff) | (RRAalphaCutoff < 0) | (RRAalphaCutoff > 1)){
stop('When provided as a single value, RRAalphaCutoff must be a numeric value equal to 0, 1, or something in between.')
}
} else {
rra.logic <- TRUE
if((sum(RRAalphaCutoff %in% c(TRUE, FALSE)) != nrow(pvals)) | (length(RRAalphaCutoff) != nrow(pvals))){
stop('When provided as a vector, RRAalphaCutoff must be the exact length of the p-value vectors in the fit object and must only contain TRUE or FALSE values.')
}
}
if (scoring %in% "fc") {
##Normalize values to rank scores
scores.deplete <- as.matrix(rank(foldchange[,1])/nrow(foldchange))
scores.enrich <- as.matrix(rank(foldchange[,2])/nrow(foldchange))
##determine the fold-change significance cutoffs
cut.deplete <- sort(scores.deplete[,1])[round(nrow(scores.deplete) * RRAalphaCutoff)]
cut.enrich <- sort(scores.enrich[,1])[round(nrow(scores.enrich) * RRAalphaCutoff)]
if(rra.logic){
cut.deplete <- RRAalphaCutoff
cut.enrich <- RRAalphaCutoff
}
} else if (scoring %in% "pvalue") {
## Normalize values to rank scores
scores.deplete <- as.matrix(rank(pvals[,'Depletion.P'])/nrow(pvals))
scores.enrich <- as.matrix(rank(pvals[,'Enrichment.P'])/nrow(pvals))
## determine the significance cutoffs for the rank statistics on the basis of p-values
cut.deplete <- (pvals[,'Depletion.P'] <= RRAalphaCutoff)
cut.enrich <- (pvals[,'Enrichment.P'] <= RRAalphaCutoff)
if (rra.logic) {
cut.deplete <- RRAalphaCutoff
cut.enrich <- RRAalphaCutoff
}
} else {
cut.deplete <- (pvals[,'Depletion.P'] <= RRAalphaCutoff)
cut.deplete[ is.na(cut.deplete) ] <- FALSE
scores.deplete <- as.matrix(rank(foldchange[,1])/nrow(foldchange))
cut.enrich <- (pvals[,'Enrichment.P'] <= RRAalphaCutoff)
cut.enrich[ is.na(cut.enrich) ] <- FALSE
scores.enrich <- as.matrix(rank(foldchange[,2])/nrow(foldchange))
if (rra.logic) {
cut.deplete <- RRAalphaCutoff
cut.enrich <- RRAalphaCutoff
}
}
## nonsignificant gRNAs are set to 1. Implicitly, this means that the significance
## relative to alpha is treated as an inherent property of the gRNA.
if(length(cut.deplete) == 1)
pass <- scores.deplete[,1] <= cut.deplete
else
pass <- cut.deplete
scores.deplete[!pass,1] <- 1
if(length(cut.enrich) == 1)
pass <- scores.enrich[,1] <= cut.enrich
else
pass <- cut.enrich
scores.enrich[!pass,1] <- 1
out = cbind(stats, scores.deplete=scores.deplete[,1], scores.enrich=scores.enrich[,1])
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.