Nothing
#' Creates a signature representing the association between gene expression (or
#' other molecular profile) and drug dose response, for use in drug sensitivity
#' analysis.
#'
#' Given a Pharmacoset of the sensitivity experiment type, and a list of drugs,
#' the function will compute a signature for the effect gene expression on the
#' molecular profile of a cell. The function returns the estimated coefficient,
#' the t-stat, the p-value and the false discovery rate associated with that
#' coefficient, in a 3 dimensional array, with genes in the first direction,
#' drugs in the second, and the selected return values in the third.
#'
#' @examples
#' data(GDSCsmall)
#' drug.sensitivity <- drugSensitivitySig(GDSCsmall, mDataType="rna",
#' nthread=1, features = fNames(GDSCsmall, "rna")[1])
#' print(drug.sensitivity)
#'
#' @param object \code{PharmacoSet} a PharmacoSet of the perturbation experiment type
#' @param mDataType \code{character} which one of the molecular data types to use
#' in the analysis, out of dna, rna, rnaseq, snp, cnv
#' @param drugs \code{character} a vector of drug names for which to compute the
#' signatures. Should match the names used in the PharmacoSet.
#' @param features \code{character} a vector of features for which to compute the
#' signatures. Should match the names used in correspondant molecular data in PharmacoSet.
#' @param cells \code{character} allows choosing exactly which cell lines to include for the signature fitting.
#' Should be a subset of cellNames(pSet)
#' @param tissues \code{character} a vector of which tissue types to include in the signature fitting.
#' Should be a subset of cellInfo(pSet)$tissueid
#' @param nthread \code{numeric} if multiple cores are available, how many cores
#' should the computation be parallelized over?
#' @param returnValues \code{character} Which of estimate, t-stat, p-value and fdr
#' should the function return for each gene drug pair?
#' @param sensitivity.measure \code{character} which measure of the drug dose
#' sensitivity should the function use for its computations? Use the
#' sensitivityMeasures function to find out what measures are available for each PSet.
#' @param molecular.summary.stat \code{character} What summary statistic should be used to
#' summarize duplicates for cell line molecular profile measurements?
#' @param sensitivity.summary.stat \code{character} What summary statistic should be used to
#' summarize duplicates for cell line sensitivity measurements?
#' @param sensitivity.cutoff \code{numeric} Allows the user to binarize the sensitivity data using this threshold.
#' @param standardize \code{character} One of "SD", "rescale", or "none", for the form of standardization of
#' the data to use. If "SD", the the data is scaled so that SD = 1. If rescale, then the data is scaled so that the 95%
#' interquantile range lies in [0,1]. If none no rescaling is done.
#' @param molecular.cutoff Allows the user to binarize the sensitivity data using this threshold.
#' @param molecular.cutoff.direction \code{character} One of "less" or "greater", allows to set direction of binarization.
#' @param verbose \code{logical} 'TRUE' if the warnings and other informative message shoud be displayed
#' @param ... additional arguments not currently fully supported by the function
#'
#' @return \code{list} a 3D array with genes in the first dimension, drugs in the
#' second, and return values in the third.
#'
#' @importMethodsFrom CoreGx drugSensitivitySig
#' @export
setMethod("drugSensitivitySig",
signature(object="PharmacoSet"),
function(object, mDataType, drugs, features, cells, tissues, sensitivity.measure = "auc_recomputed",
molecular.summary.stat = c("mean", "median", "first", "last", "or", "and"),
sensitivity.summary.stat = c("mean", "median", "first", "last"),
returnValues = c("estimate", "pvalue", "fdr"),
sensitivity.cutoff, standardize = c("SD", "rescale", "none"), molecular.cutoff = NA,
molecular.cutoff.direction = c("less", "greater"), nthread = 1, verbose=TRUE, ...){
.drugSensitivitySigPharmacoSet(object, mDataType, drugs, features, cells, tissues, sensitivity.measure,
molecular.summary.stat, sensitivity.summary.stat, returnValues,
sensitivity.cutoff, standardize, molecular.cutoff, molecular.cutoff.direction,
nthread, verbose, ...)
})
#' @import parallel
#' @importFrom SummarizedExperiment assayNames assay
#' @keywords internal
.drugSensitivitySigPharmacoSet <- function(object,
mDataType,
drugs,
features,
cells,
tissues,
sensitivity.measure = "auc_recomputed",
molecular.summary.stat = c("mean", "median", "first", "last", "or", "and"),
sensitivity.summary.stat = c("mean", "median", "first", "last"),
returnValues = c("estimate", "pvalue", "fdr"),
sensitivity.cutoff, standardize = c("SD", "rescale", "none"),
molecular.cutoff = NA,
molecular.cutoff.direction = c("less", "greater"),
nthread = 1,
verbose=TRUE,
...)
{
### This function needs to: Get a table of AUC values per cell line / drug
### Be able to recompute those values on the fly from raw data if needed to change concentration
### Be able to choose different summary methods on fly if needed (need to add annotation to table to tell what summary method previously used)
### Be able to extract genomic data
### Run rankGeneDrugSens in parallel at the drug level
### Return matrix as we had before
#sensitivity.measure <- match.arg(sensitivity.measure)
molecular.summary.stat <- match.arg(molecular.summary.stat)
sensitivity.summary.stat <- match.arg(sensitivity.summary.stat)
standardize <- match.arg(standardize)
molecular.cutoff.direction <- match.arg(molecular.cutoff.direction)
dots <- list(...)
ndots <- length(dots)
if (!all(sensitivity.measure %in% colnames(sensitivityProfiles(object)))) {
stop (sprintf("Invalid sensitivity measure for %s, choose among: %s", object@annotation$name, paste(colnames(sensitivityProfiles(object)), collapse=", ")))
}
if (!(mDataType %in% names(object@molecularProfiles))) {
stop (sprintf("Invalid mDataType for %s, choose among: %s", object@annotation$name, paste(names(object@molecularProfiles), collapse=", ")))
}
switch (S4Vectors::metadata(object@molecularProfiles[[mDataType]])$annotation,
"mutation" = {
if (!is.element(molecular.summary.stat, c("or", "and"))) {
stop ("Molecular summary statistic for mutation must be either 'or' or 'and'")
}
},
"fusion" = {
if (!is.element(molecular.summary.stat, c("or", "and"))) {
stop ("Molecular summary statistic for fusion must be either 'or' or 'and'")
}
},
"rna" = {
if (!is.element(molecular.summary.stat, c("mean", "median", "first", "last"))) {
stop ("Molecular summary statistic for rna must be either 'mean', 'median', 'first' or 'last'")
}
},
"cnv" = {
if (!is.element(molecular.summary.stat, c("mean", "median", "first", "last"))) {
stop ("Molecular summary statistic for cnv must be either 'mean', 'median', 'first' or 'last'")
}
},
"rnaseq" = {
if (!is.element(molecular.summary.stat, c("mean", "median", "first", "last"))) {
stop ("Molecular summary statistic for rna must be either 'mean', 'median', 'first' or 'last'")
}},
"isoform" = {
if (!is.element(molecular.summary.stat, c("mean", "median", "first", "last"))) {
stop ("Molecular summary statistic for rna must be either 'mean', 'median', 'first' or 'last'")
}},
stop (sprintf("No summary statistic for %s has been implemented yet", S4Vectors::metadata(object@molecularProfiles[[mDataType]])$annotation))
)
if (!is.element(sensitivity.summary.stat, c("mean", "median", "first", "last"))) {
stop ("Sensitivity summary statistic for sensitivity must be either 'mean', 'median', 'first' or 'last'")
}
if (missing(sensitivity.cutoff)) {
sensitivity.cutoff <- NA
}
if (missing(drugs)){
drugn <- drugs <- drugNames(object)
} else {
drugn <- drugs
}
if (missing(cells)){
celln <- cells <- cellNames(object)
} else {
celln <- cells
}
availcore <- parallel::detectCores()
if ( nthread > availcore) {
nthread <- availcore
}
if (missing(features)) {
features <- rownames(featureInfo(object, mDataType))
} else {
fix <- is.element(features, rownames(featureInfo(object, mDataType)))
if (verbose && !all(fix)) {
warning (sprintf("%i/%i features can be found", sum(fix), length(features)))
}
features <- features[fix]
}
if(is.null(dots[["sProfiles"]])){
drugpheno.all <- lapply(sensitivity.measure, function(sensitivity.measure) {
return(t(summarizeSensitivityProfiles(object,
sensitivity.measure = sensitivity.measure,
summary.stat = sensitivity.summary.stat,
verbose = verbose)))
})} else {
sProfiles <- dots[["sProfiles"]]
drugpheno.all <- list(t(sProfiles))
}
dix <- is.element(drugn, do.call(colnames, drugpheno.all))
if (verbose && !all(dix)) {
warning (sprintf("%i/%i drugs can be found", sum(dix), length(drugn)))
}
if (!any(dix)) {
stop("None of the drugs were found in the dataset")
}
drugn <- drugn[dix]
cix <- is.element(celln, do.call(rownames, drugpheno.all))
if (verbose && !all(cix)) {
warning (sprintf("%i/%i cells can be found", sum(cix), length(celln)))
}
if (!any(cix)) {
stop("None of the cells were found in the dataset")
}
celln <- celln[cix]
if(!missing(tissues)){
celln <- celln[cellInfo(object)[celln,"tissueid"] %in% tissues]
} else {
tissues <- unique(cellInfo(object)$tissueid)
}
object@molecularProfiles[[mDataType]] <- summarizeMolecularProfiles(object = object,
mDataType = mDataType,
summary.stat = molecular.summary.stat,
binarize.threshold = molecular.cutoff,
binarize.direction = molecular.cutoff.direction,
verbose = verbose)[features, ]
if(!is.null(dots[["mProfiles"]])){
mProfiles <- dots[["mProfiles"]]
SummarizedExperiment::assay(object@molecularProfiles[[mDataType]]) <- mProfiles[features, colnames(object@molecularProfiles[[mDataType]]), drop = FALSE]
}
drugpheno.all <- lapply(drugpheno.all, function(x) {x[intersect(phenoInfo(object, mDataType)[ ,"cellid"], celln), , drop = FALSE]})
molcellx <- phenoInfo(object, mDataType)[ ,"cellid"] %in% celln
type <- as.factor(cellInfo(object)[phenoInfo(object, mDataType)[molcellx,"cellid"], "tissueid"])
if("batchid" %in% colnames(phenoInfo(object, mDataType))){
batch <- phenoInfo(object, mDataType)[molcellx, "batchid"]
} else {
batch <- rep(NA, times=nrow(phenoInfo(object, mDataType)))
}
batch[!is.na(batch) & batch == "NA"] <- NA
batch <- as.factor(batch)
names(batch) <- phenoInfo(object, mDataType)[molcellx, "cellid"]
batch <- batch[rownames(drugpheno.all[[1]])]
if (verbose) {
message("Computing drug sensitivity signatures...")
}
splitix <- parallel::splitIndices(nx = length(drugn), ncl = 1)
splitix <- splitix[vapply(splitix, length, FUN.VALUE=numeric(1)) > 0]
mcres <- parallel::mclapply(splitix, function(x, drugn, expr, drugpheno, type, batch, standardize, nthread) {
res <- NULL
for(i in drugn[x]) {
## using a linear model (x ~ concentration + cell + batch)
dd <- lapply(drugpheno, function(x) x[,i])
dd <- do.call(cbind, dd)
colnames(dd) <- seq_len(ncol(dd))
if(!is.na(sensitivity.cutoff)) {
dd <- factor(ifelse(dd > sensitivity.cutoff, 1, 0), levels=c(0, 1))
}
rr <- rankGeneDrugSensitivity(data=expr, drugpheno=dd, type=type, batch=batch, single.type=FALSE, standardize=standardize, nthread=nthread, verbose=verbose)
res <- c(res, list(rr$all))
}
names(res) <- drugn[x]
return(res)
}, drugn=drugn, expr=t(molecularProfiles(object, mDataType)[features, molcellx, drop=FALSE]), drugpheno=drugpheno.all, type=type, batch=batch, nthread=nthread, standardize=standardize)
res <- do.call(c, mcres)
res <- res[!vapply(res, is.null, FUN.VALUE=logical(1))]
drug.sensitivity <- array(NA,
dim = c(nrow(featureInfo(object, mDataType)[features,, drop=FALSE]),
length(res), ncol(res[[1]])),
dimnames = list(rownames(featureInfo(object, mDataType)[features,, drop=FALSE]), names(res), colnames(res[[1]])))
for(j in seq_len(ncol(res[[1]]))) {
ttt <- vapply(res, function(x, j, k) {
xx <- array(NA, dim = length(k), dimnames = list(k))
xx[rownames(x)] <- x[ , j, drop=FALSE]
return (xx)
},
j=j,
k=rownames(featureInfo(object, mDataType)[features,, drop = FALSE]),
FUN.VALUE=numeric(dim(drug.sensitivity)[1]))
drug.sensitivity[rownames(featureInfo(object, mDataType)[features,, drop = FALSE]), names(res), j] <- ttt
}
drug.sensitivity <- PharmacoSig(drug.sensitivity,
PSetName = name(object),
Call = as.character(match.call()),
SigType='Sensitivity',
Arguments = list(
"mDataType" = mDataType,
"drugs" = drugs,
"features" = features,
"cells" = cells,
"tissues" = tissues,
"sensitivity.measure" = sensitivity.measure,
"molecular.summary.stat" = molecular.summary.stat,
"sensitivity.summary.stat" = sensitivity.summary.stat,
"returnValues" = returnValues,
"sensitivity.cutoff" = sensitivity.cutoff,
"standardize" = standardize,
"molecular.cutoff" = molecular.cutoff,
"molecular.cutoff.direction" = molecular.cutoff.direction,
"nthread" = nthread,
"verbose" = verbose))
return(drug.sensitivity)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.