#'MIMOSA: Mixture Models for Single Cell Assays
#'
#'MIMOSA implements mxitures of Dirichlet-multinomial or Beta-binomial models
#'for paired count data from single--cell assays that typically arise
#'in immunological studies. It can be used for ICS (Intracellular Cytokine
#'Staining) assays to detect vaccine responders, for example, or to detect
#'changes in proportions of cells expressing a gene, such as in Fluidigm Biomark
#'Single--cell gene expression.
#'
#'@docType package
#'@useDynLib MIMOSA, .registration=TRUE
#'@rdname MIMOSA-package
#'@import Formula
#'@importFrom testthat test_dir expect_that context
#'@importFrom MASS ginv
#'@importClassesFrom methods array character data.frame factor integer matrix numeric
#'@importFrom grDevices gray.colors
#'@importFrom graphics contour image legend lines par plot points title
#'@importFrom methods callNextMethod show
#'@importFrom stats as.formula dbeta dexp ecdf fisher.test model.frame na.omit optim p.adjust pbeta qbeta quantile rbeta rbinom rmultinom rnorm runif terms var
#'@importFrom utils read.table globalVariables
#'@importFrom reshape rename melt cast
#'@importFrom plyr ddply is.formula
#'@importFrom methods new
#'@importFrom Rcpp evalCpp
#'@name MIMOSA-package
#'@seealso \code{\link{MIMOSA}}, \code{\link{ConstructMIMOSAExpressionSet}}
#'@references Greg Finak, Andrew McDavid, Pratip Chattopadhyay, Maria Dominguez, Stephen C De Rosa, Mario Roederer, Raphael Gottardo
#' Mixture Models for Single Cell Assays with Applications to Vaccine Studies
#' Biostatistics, 2013, \url{http://biostatistics.oxfordjournals.org/content/early/2013/07/24/biostatistics.kxt024.abstract}
NULL
utils::globalVariables(c( "PTID", "Proportion", "Proportion_REF", "RefTreat", "rr", ".", "mclapply"))
#' Stimulated and unstimulated T-cell counts for an ICS assay
#'
#' A data set containing T-cell counts for various stimulations and cytokines in an ICS assay.
#'
#' \itemize{
#' \item pos. The positive cell counts
#' \item neg. The negative cell counts
#' \item fname. The feature name (cytokine) measured
#' \item parent. The parent T-cell population
#' \item antigen. The antigen stimulation for this sample
#' \item ID. The subject ID
#' }
#'
#'@docType data
#'@keywords datasets
#'@format A data frame with 3960 rows
#'@name ICS
NULL
#'Fit a MIMOSA Model
#'
#'This method fits a MIMOSA model to count data stored in an ExpressionSet
#'object.
#'
#'@details The ExpressionSet should be fully annotated with featureData and
#' phenoData. For ICS data, for example, features would be positive and
#' negative counts for different cytokine producing cell subsets (i.e.
#' IFNg_pos, IFNg_neg) The formula lhs should contain features and the rhs
#' should contain phenotypic variable. See the vignette for an example.
#'
#'@param formula describing the features on the lhs and the phenodata on the
#' rhs, supporting extended formula interface with conditioning.
#'@param data an \code{ExpressionSet} object with features on rows and samples
#' (labelled with phenoData) on columns.
#'@param ... additional arguments
#'@return an object of type \code{MIMOSAResult}
#'@aliases MIMOSA,formula,ExpressionSet-method
#'@importFrom data.table key
#'@importFrom coda mcmc
#'@examples
#' data(ICS)
#' E<-ConstructMIMOSAExpressionSet(ICS,
#' reference=ANTIGEN%in%'negctrl',measure.columns=c('CYTNUM','NSUB'),
#' other.annotations=c('CYTOKINE','TCELLSUBSET','ANTIGEN','UID'),
#' default.cast.formula=component~UID+ANTIGEN+CYTOKINE+TCELLSUBSET,
#' .variables=.(TCELLSUBSET,CYTOKINE,UID),
#' featureCols=1,ref.append.replace='_REF')
#'
#' result<-MIMOSA(NSUB+CYTNUM~UID+TCELLSUBSET+CYTOKINE|ANTIGEN,
#' data=E, method='EM',
#' subset=RefTreat%in%'Treatment'&ANTIGEN%in%'ENV',
#' ref=ANTIGEN%in%'ENV'&RefTreat%in%'Reference')
#'@seealso \code{\link{MIMOSA-package}} \code{\link{ConstructMIMOSAExpressionSet}} \code{\link{MIMOSAResult}}
#'@export
setGeneric("MIMOSA", def = function(formula, data, ...) {
standardGeneric("MIMOSA")
})
setMethod("MIMOSA", c("formula", "ExpressionSet"), definition = function(formula,
data, ref = RefTreat %in% "Reference", RT = TRUE, method = "mcmc", subset = RefTreat %in%
"Treatment", getP = FALSE, p.thin = 1, run.parallel = FALSE, cleanup = TRUE,
...) {
if (!exists("ref")) {
stop("ref must contain expressions that define subsets to be compared")
}
if (!inherits(formula, "Formula")) {
formula <- Formula(formula)
}
# Does the formula contain RefTreat?
if (!any(attr(terms(formula), "term.labels") %in% "RefTreat") & RT) {
warning("Formula does not contain the RefTreat variable. It will be added automatically. Set RT=FALSE to disable this.")
formula <- Formula(formula(gsub("\\|", "+ RefTreat |", paste0(deparse(formula),
collapse = ""))))
}
method <- match.arg(method, c("mcmc", "EM"))
# if (Sys.info()["sysname"] == "Darwin") {
# run.parallel <- FALSE
# warning("Parallel is disabled on the Mac due to a bug that is yet to be tracked down.\n Your analysis will run in serial.\n Grab a coffee and wait..\n")
#}
mf.ref <- model.frame(formula, data, na.action = NULL)
mf.test <- model.frame(formula, data, na.action = NULL)
if (!missing(ref)) {
mf.ref <- mf.ref[eval(substitute(ref), pData(data)), , drop = FALSE]
}
if (!missing(subset)) {
mf.test <- mf.test[eval(substitute(subset), pData(data)), , drop = FALSE]
}
if (RT) {
if (nlevels(factor(mf.ref$RefTreat)) != 1) {
mf.ref <- mf.ref[mf.ref$RefTreat %in% "Reference", ]
}
if (nlevels(factor(mf.test$RefTreat)) != 1) {
mf.test <- mf.test[mf.test$RefTreat %in% "Treatment", ]
}
}
if (length(formula)[2] > 1) {
spl.ref <- model.part(formula, mf.ref, rhs = 2)
spl.test <- model.part(formula, mf.test, rhs = 2)
}
interact <- function(x, ...) {
if (length(x) == 1) {
return(factor(x[[1]]))
}
if (length(x) == 2) {
interaction(x[[1]], x[[2]], drop = TRUE, ...)
} else {
interaction(x[[1]], Recall(x[-1L]), ...)
}
}
if (length(formula)[2] > 1) {
ref <- split(model.part(formula, mf.ref, lhs = 1), factor(interact(spl.ref)))
test <- split(model.part(formula, mf.test, lhs = 1), factor(interact(spl.test)))
pd <- split(model.part(formula, mf.test, rhs = 1:2), factor(interact(spl.test)))
result <- vector("list", length(test))
# Here should filter stuff that has empty categories.
filter_empty <- do.call(c, lapply(ref, function(x) dim(x)[1] != 0))
ref <- ref[filter_empty]
test <- test[filter_empty]
pd <- pd[filter_empty]
} else {
ref <- list(model.part(formula, mf.ref, lhs = 1))
test <- list(model.part(formula, mf.test, lhs = 1))
pd <- list(model.part(formula, mf.test, rhs = 1))
result <- vector("list", 1)
filter_empty <- do.call(c, lapply(ref, function(x) dim(x)[1] != 0))
ref <- ref[filter_empty]
test <- test[filter_empty]
pd <- pd[filter_empty]
}
if (!run.parallel) {
for (i in 1:length(test)) {
# recycle the reference
j <- data.frame(1:length(test), 1:length(ref))[i, 2]
fitme <- list(n.stim = test[[i]], n.unstim = ref[[j]])
# remove NAs
nas <- unique(rbind(which(is.na(fitme[[1]]), TRUE), which(is.na(fitme[[2]]),
TRUE))[, 1])
# also remove zeros
if (length(nas) > 0) {
fitme[[1]] <- fitme[[1]][-nas, ]
fitme[[2]] <- fitme[[2]][-nas, ]
pd[[i]] <- pd[[i]][-nas, ]
}
# don't fit empty data! These are now filtered out earlier..
if (nrow(pd[[i]]) > 0) {
if (method %in% "mcmc") {
res <- .fitMCMC(fitme, inits = MDMix(fitme, initonly = TRUE), ...)
res$params <- res$params <- apply(res$getmcmc(), 2, function(x) quantile(x,
c(0.025, 0.5, 0.975), na.rm = TRUE))
if (ncol(fitme[[1]]) == 2 & getP) {
res$p <- lapply(res$getP(thin = p.thin), function(x) do.call(rbind,
lapply(x, function(x) quantile(x, c(0.025, 0.5, 0.975), na.rm = TRUE))))
} else {
res$p <- list()
}
if(ncol(fitme[[1]]) == 2){
res$IndMat<-res$getRespInd()
}
attr(res, "pData") <- new("AnnotatedDataFrame", pd[[i]])
colnames(res$IndMat)<-rownames(pd[[i]])
outfile <- get("outfile", environment(res$getmcmc))
if (cleanup) {
unlink(outfile)
unlink(paste(outfile, "P", sep = ""))
}
res <- MCMCResult(object = res)
} else if (method %in% "EM") {
res <- try(MDMix(fitme))
if (inherits(res, "try-error")) {
message("Failed to fit subset ", i, " with EM. Trying mcmc")
res <- .fitMCMC(fitme, inits = MDMix(fitme, initonly = TRUE),
...)
res$params <- res$params <- apply(res$getmcmc(), 2, function(x) quantile(x,
c(0.025, 0.5, 0.975), na.rm = TRUE))
if (ncol(fitme[[1]]) == 2 & getP) {
res$p <- lapply(res$getP(thin = p.thin), function(x) do.call(rbind,
lapply(x, function(x) quantile(x, c(0.025, 0.5, 0.975), na.rm = TRUE))))
} else {
res$p <- list()
}
attr(res, "pData") <- new("AnnotatedDataFrame", pd[[i]])
outfile <- get("outfile", environment(res$getmcmc))
if (cleanup) {
unlink(outfile)
unlink(paste(outfile, "P", sep = ""))
}
res <- MCMCResult(object = res)
if(!all.equal(res@IndMat,data.frame())){
colnames(res@IndMat)<-rownames(pData(res))
}
} else {
res@pd <- new("AnnotatedDataFrame", pd[[i]])
}
}
res <- new("MIMOSAResult", result = res)
result[[i]] <- res
} else {
result[[i]] <- NA
}
}
} else if (run.parallel & any(grepl("parallel", loadedNamespaces()))) {
result <- mclapply(1:length(test), function(i) {
# recycle the reference
j <- data.frame(1:length(test), 1:length(ref))[i, 2]
fitme <- list(n.stim = test[[i]], n.unstim = ref[[j]])
# remove NAs
nas <- unique(rbind(which(is.na(fitme[[1]]), TRUE), which(is.na(fitme[[2]]),
TRUE))[, 1])
if (length(nas) > 0) {
fitme[[1]] <- fitme[[1]][-nas, ]
fitme[[2]] <- fitme[[2]][-nas, ]
pd[[i]] <- pd[[i]][-nas, ]
}
if (nrow(pd[[i]]) > 0) {
if (method %in% "mcmc") {
res <- .fitMCMC(fitme, inits = MDMix(fitme, initonly = TRUE), ...)
res$params <- res$params <- apply(res$getmcmc(), 2, function(x) quantile(x,
c(0.025, 0.5, 0.975), na.rm = TRUE))
if (ncol(fitme[[1]]) == 2 & getP) {
res$p <- lapply(res$getP(thin = p.thin), function(x) do.call(rbind,
lapply(x, function(x) quantile(x, c(0.025, 0.5, 0.975), na.rm = TRUE))))
} else {
res$p <- list()
}
attr(res, "pData") <- new("AnnotatedDataFrame", pd[[i]])
outfile <- get("outfile", environment(res$getmcmc))
unlink(outfile)
unlink(paste(outfile, "P", sep = ""))
res <- MCMCResult(object = res)
} else if (method %in% "EM") {
res <- try(MDMix(fitme))
res <- try(MDMix(fitme))
if (inherits(res, "try-error")) {
message("Failed to fit subset ", i, " with EM. Trying mcmc")
res <- .fitMCMC(fitme, inits = MDMix(fitme, initonly = TRUE),
...)
res$params <- res$params <- apply(res$getmcmc(), 2, function(x) quantile(x,
c(0.025, 0.5, 0.975), na.rm = TRUE))
if (ncol(fitme[[1]]) == 2 & getP) {
res$p <- lapply(res$getP(thin = p.thin), function(x) do.call(rbind,
lapply(x, function(x) quantile(x, c(0.025, 0.5, 0.975), na.rm = TRUE))))
} else {
res$p <- list()
}
attr(res, "pData") <- new("AnnotatedDataFrame", pd[[i]])
outfile <- get("outfile", environment(res$getmcmc))
unlink(outfile)
unlink(paste(outfile, "P", sep = ""))
res <- MCMCResult(object = res)
} else {
res@pd <- new("AnnotatedDataFrame", pd[[i]])
}
}
res <- new("MIMOSAResult", result = res)
return(res)
} else {
return(NA)
}
})
} else {
stop("Can't run parallel MIMOSA. Must load multicore package.")
}
class(result) <- c("MIMOSAResultList", "list")
if (length(result) > 0) {
splitted <- strsplit(paste0(deparse(formula[[3]]), collapse = ""), "|", fixed = TRUE)[[1]]
if (length(splitted) > 1) {
depf <- strsplit(gsub(" ", "", splitted[[2]]), "+", fixed = TRUE)[[1]]
} else {
depf <- NULL
}
n_vars <- length(depf)
# NOTE this is probably wrong..
if (n_vars > 0) {
#browser()
#names(result) <- levels(factor(interaction(as.data.frame(lapply(pData(result)[,
# depf, with = FALSE], factor)))))
names(result) <- names(test)
} else {
names(result) <- "result"
}
}
return(result)
})
#'@importFrom Biobase `pData<-`
#'@title pData
#'@docType methods
#'@rdname pData-methods
#'@param value the phenoData table to be assigned to the object.
#'@export
setMethod("pData<-","MCMCResult",function(object,value){
pData(object@result@phenoData)<-value
object
})
#'@export
#'@rdname pData-methods
setMethod("pData<-","MIMOSAResult",function(object,value){
pData(object@result@phenoData)<-value
object
})
#'@export
#'@rdname pData-methods
setMethod("pData<-", c("BetaMixResult", "data.frame"), function(object, value) {
pData(object@pd) <- value
object
})
#'pData extract the phenoData table from a MIMOSA result
#'@details Extracts the phenoData data.frame from a MIMOSAResult object
#'
#'@param object is the MIMOSAResult returned from a call to MIMOSA
#'@return an object of type \code{data.frame}
#'@importMethodsFrom Biobase pData
#'@aliases pData,MIMOSAResult-method
#'@method pData MIMOSAResult
#'@rdname pData-methods
#'@export
setMethod("pData", "MIMOSAResult", function(object) {
pData(object@result)
})
#'@method pData MDMixResult
#'@aliases pData,BetaMixResult
#'@aliases pData,MDMixResult-method
#'@rdname pData-methods
#'@export
setMethod("pData", "MDMixResult", function(object) {
pData(object@pd)
})
#'@aliases pData,MCMCResult-method
#'@method pData MCMCResult
#'@rdname pData-methods
#'@export
setMethod("pData", "MCMCResult", function(object) {
pData(object@phenoData)
})
#'@method pData MCMCResult
#'@rdname pData-methods
#'@export
setMethod("pData", "BetaMixResult", function(object) {
pData(object@pd)
})
# 'roc computes an ROC curve ' '@param p is the probability of a positive result
# '@param truth is a logical with the true positive and negative results
roc <- function(p, truth) {
s <- seq(0, 1, l = 1000)
table <- t(sapply(s, function(th) {
prop.table(table(test = factor(p <= th, levels = c("FALSE", "TRUE")), truth = truth),
margin = 2)["TRUE", ]
}))
colnames(table) <- c("FPR", "TPR")
table
}
# 'fdrComparison calculates the observed vs expected false discovery rate '@param
# fdr is the expected false discovery rate '@param truth is a logical with the
# true positive and negative results
fdrComparison <- function(fdr, truth) {
truth <- truth[order(fdr)]
true.fdr <- cumsum(1 - truth)/1:length(truth)
fdr <- sort(fdr)
cbind(fdr, true.fdr)
}
#' Construct an ExpressionSet for MIMOSA
#'
#' Starting from a reshaped data frame in the correct format, construct
#' an ExpressionSet object that can be used with MIMOSA.
#'
#' @details The featureCols will be used to construct feature names, and these
#' columns will be dropped from the exprs matrix. The column names are assumed
#' to have names that contain '_' characters separating phenotypic
#' characteristics. These would be generated automatically
#' if the data frame was constrcuted with 'reshape'. They
#' are used to construct the phenoData for the expression set
#' @param df a data.frame that is in the correct form
#' @param featureCols the indices of the columns that identify features.
#' @importFrom Biobase ExpressionSet AnnotatedDataFrame
#' @importFrom ggplot2 aes geom_boxplot coord_trans facet_wrap ggtitle geom_jitter position_jitter scale_fill_brewer scale_color_brewer geom_line facet_grid
#' @importFrom MASS huber
#' @importFrom rlang enquo `!!` .data
#' @examples
#' E<-ConstructMIMOSAExpressionSet(ICS,
##' reference=ANTIGEN%in%'negctrl',measure.columns=c('CYTNUM','NSUB'),
##' other.annotations=c('CYTOKINE','TCELLSUBSET','ANTIGEN','UID'),
##' default.cast.formula=component~UID+ANTIGEN+CYTOKINE+TCELLSUBSET,
##' .variables=.(TCELLSUBSET,CYTOKINE,UID),
##' featureCols=1,ref.append.replace='_REF')
#' @export
MIMOSAExpressionSet <- function(df, featureCols) {
featuredata <- attributes(df)$rdimnames[[1]]
pdata <- attributes(df)$rdimnames[[2]]
df <- df[, -featureCols, drop = FALSE] #adata
rownames(df) <- rownames(featuredata)
E <- ExpressionSet(as.matrix(as.data.frame(df)), featureData = AnnotatedDataFrame(as.data.frame(featuredata)),
phenoData = AnnotatedDataFrame(pdata))
return(E)
}
# 'Replicate the reference observations across all treatment groups using ddply '
# '@details Should be passed to .fun argument of ddply. ' '@param dat the piece
# we will work on '@param ref an expression evaluating to a logical vector that
# identifies the reference class within the piece. '@param cols A character
# vector of the names of the columns that hold our measurements '@param
# annotations A character vector of additional annotation columns '@param
# default.formula a default formula to be used for casting the data.
setReference <- function(dat, ref = NULL, cols = NULL, annotations = NULL, default.formula = component ~
...) {
REFERENCE <- try(eval(ref, dat))
if (inherits(REFERENCE, "try-error")) {
REFERENCE <- eval(substitute(ref), dat)
}
if (inherits(REFERENCE, "try-error")) {
stop("Cannot evaluate ref argument")
}
# if((nrow(dat)!=2)|nlevels(factor(REFERENCE))==1){ return(NULL) }
if (sum(REFERENCE) != 1) {
return(NULL)
}
MEASUREMENTS <- do.call(cbind, with(dat, mget(cols, envir = as.environment(-1L))))
# MEASUREMENTS<-model.frame(as.formula(paste('~',paste(cols,collapse='+'))),dat)
NEWNAMES <- paste(cols, "REF", sep = "_")
REF.MEAS <- MEASUREMENTS[REFERENCE, , drop = FALSE]
TREAT.MEAS <- MEASUREMENTS[!REFERENCE, , drop = FALSE]
if (nrow(TREAT.MEAS) == 0) {
return(NULL)
}
REF.MEAS <- matrix(REF.MEAS, nrow = dim(TREAT.MEAS)[1], ncol = dim(TREAT.MEAS)[2],
byrow = TRUE)
colnames(REF.MEAS) <- NEWNAMES
retme <- cbind(TREAT.MEAS, REF.MEAS)
if (!is.null(annotations)) {
ANNOTATIONS <- do.call(data.frame, with(dat, mget(annotations, envir = as.environment(-1L))))[!REFERENCE,
, drop = FALSE]
# ANNOTATIONS<-model.frame(as.formula(paste('~',paste(annotations,collapse='+'))),dat)[!REFERENCE,,drop=FALSE]
retme <- cbind(ANNOTATIONS, retme)
}
retme
}
##'A wrapper for constructing an Expression Set for MIMOSA
##'
##'Calls a series of other functions that will reshape and refactor the data frame into the right format for use by MIMOSA
##'Standardized for use with internal SCHARP data sets.
##'We provide some default arguments as examples. Currently slow, and very much prototype code.
##'@param thisdata is the input data frame
##'@param reference is an \code{expression} that evaluates to a \code{logical} vector which specifices the observations in the data frame that are to be used for the negative control or reference set
##'@param measure.columns is a \code{chracter} vector that specifies which columns hold the observed counts
##'@param other.annotations is a \code{character} vector that specifies which additional columns in the data frame should be included in the returned data. By default we take everything, but you could specify only relevant phenotypic information.
##'@param default.cast.formula is a \code{formula} that tells reshape how to recast the data frame so that rows corresponde to different measured components and columns correspond to samples. By default \code{component~...} will put the components as the rows (i.e. positive and negative cell counts) and all measured phenotypic information on the columns.
##'@param .variables is a dotted list that specifies the variable names (columns of the data frame) by which to group the data when organzing stimulated and unstimulated observations. i.e. PTID x ANTIGEN x TCELLSUBSET x TESTDT, or something else for your own data.
##'@param featureCols is a \code{numeric} vector that specifies the indices of the columns to be used to name the features. If the casting formula is \code{component~...} then there is only one feature column (and it is the first one), so \code{featureCols = 1}, by default.
##'@param ref.append.replace the terminating character string in the column names of the negative controls. It will be replaces with _REF for 'reference'
##'@examples
##' data(ICS)
##' E<-ConstructMIMOSAExpressionSet(ICS,
##' reference=ANTIGEN%in%'negctrl',measure.columns=c('CYTNUM','NSUB'),
##' other.annotations=c('CYTOKINE','TCELLSUBSET','ANTIGEN','UID'),
##' default.cast.formula=component~UID+ANTIGEN+CYTOKINE+TCELLSUBSET,
##' .variables=.(TCELLSUBSET,CYTOKINE,UID),
##' featureCols=1,ref.append.replace='_REF')
##'
##'@export
ConstructMIMOSAExpressionSet <- function(thisdata, reference = quote(STAGE %in% "CTRL" &
PROTEIN %in% "Media+cells"), measure.columns = c("Neg", "Pos"), other.annotations = setdiff(colnames(thisdata),
measure.columns), default.cast.formula = component ~ ..., .variables = quote(.(PTID,
TESTDT, ASSAYID, PLATEID)), featureCols = 1, ref.append.replace = "_NEG") {
# if reference is null, then we already have the data in a form we need
if (!is.null(substitute(reference))) {
# Set the Reference Class to be the negative control Media+cells for each
# ptid/date/assayid/plate
thisdata <- ddply(thisdata, .variables = .variables, setReference, ref = substitute(reference),
cols = measure.columns, annotations = other.annotations, default.formula = default.cast.formula)
} else {
colnames(thisdata) <- gsub(ref.append.replace, "_REF", colnames(thisdata))
}
if (nrow(thisdata) == 0)
stop("All your data was filtered when reshaping due to non-unique pairs of samples. Perhaps you need to aggregate negative controls")
# transform the data so that features are on the rows and samples are on the
# columns build a function to reshape the returned data and assign it to the
# calling environment
MIMOSAReshape <- function(mydata = NULL, default.formula = NULL, cols = measure.columns) {
if (!grepl("RefTreat", paste(deparse(default.formula), collapse = ""))) {
default.formula <- as.formula(paste(paste(deparse(default.formula), collapse = ""),
"RefTreat", sep = "+"))
}
NEWNAMES <- paste(cols, "REF", sep = "_")
mydata <- reshape::melt(mydata, measure.var = c(cols, NEWNAMES))
mydata$RefTreat <- factor(grepl("_REF$", mydata$variable), labels = c("Treatment",
"Reference"))
mydata <- reshape::rename(mydata, c(variable = "component", value = "count"))
mydata$component <- factor(gsub("_REF$", "", mydata$component))
mydata <- reshape::cast(reshape::melt(mydata, measure = "count"), default.formula)
mydata
}
thisdata <- MIMOSAReshape(mydata = thisdata, default.formula = default.cast.formula,
cols = measure.columns)
MIMOSAExpressionSet(thisdata, featureCols = featureCols)
}
# 'Sumarize the replicates in an elispot epitope mapping data set from SCHARP ' '
# Thus function will summarize the replicate observations for the negative
# controls and stimulations in an epitope mapping data set from SCHARP. The user
# provides information on grouping structure, cells per well, replicate
# observation columns and so forth. The function is meant to be passed to ddply
# '@param x is the piece of data currently being worked on. It is a unique
# combination of the grouping variables passed to ddply '@param CONTROL is an
# expression that evaluates to a logical vector specifying which rows of the data
# frame are control observations. '@param WELLMULTIPLIER is a numeric argument
# that specifies the factor by which to multiply the denominator (in the
# \code{cells.per.well} argument). i.e., if there are 100K cells per well, but
# the cells.per.well column has 100, the WELLMULTIPLIER would be 1000 '@param
# replicates is a character vector identifying the column names that contain the
# replicated observations. These will be summed. '@param cells.per.well is a
# character vector that identifies the column containing the number of cells per
# well. '@param SAMPLE is an expression evaluating to a logical vector that
# identifies the rows of the data frame which are antigen or peptide stimualtions
# rather than control samples.
match.elispot.antigens <- function(x, CONTROL = quote(STAGE %in% "CTRL" & PROTEIN %in%
"Media+cells"), WELLMULTIPLIER = 1000, replicates = c("REP1", "REP2", "REP3",
"REP4", "REP5", "REP6"), cells.per.well = "CELLWELL", SAMPLE = quote(!STAGE %in%
"CTRL")) {
control.sub <- eval(eval(substitute(CONTROL)), x)
ctrl <- subset(x, control.sub)
CPOS <- sum(ctrl[, replicates], na.rm = TRUE)
l <- length(na.omit(t(ctrl[, replicates, drop = FALSE])))
CNEG <- WELLMULTIPLIER * l * ctrl[, cells.per.well, drop = TRUE]
samples <- eval(eval(substitute(SAMPLE)), x)
samps <- subset(x, samples)
PNEG <- NULL
PPOS <- NULL
for (i in 1:nrow(samps)) {
ppos <- na.omit(t(samps[i, replicates, drop = FALSE]))
l <- length(ppos)
pneg <- samps[i, cells.per.well] * l * WELLMULTIPLIER
ppos <- sum(ppos)
PNEG <- c(PNEG, pneg)
PPOS <- c(PPOS, ppos)
}
if (any(c(nrow(samps), nrow(ctrl)) %in% 0))
return(NULL) else ret <- rbind(data.frame(samps, Neg = PNEG, Pos = PPOS), data.frame(ctrl,
Neg = CNEG, Pos = CPOS))
ret
}
setOldClass("MIMOSAResultList")
#'Print a MIMOSAResultList
#'
#'Print a summary of the list of results returned by a call to \code{MIMOSA}
#'@rdname print
#'@method print MIMOSAResultList
#'@param x a \code{MIMOSAResultList}
#'@param ... additional arguments passed down
print.MIMOSAResultList <- function(x, ...) {
cat(sprintf("A MIMOSAResultList with %s models for\n", length(x)))
cat(sprintf("%s ", names(x)))
}
#'Show a MIMOSAResultList
#'
#'Show a summary of a MIMOSAResultList.
#'@rdname show
#'@title show
#'@param object \code{MIMOSAResultList}
#'@name show
#'@aliases show,MIMOSAResult-method
setMethod("show", "MIMOSAResult", function(object) {
cat("A MIMOSA Model with ")
cat(sprintf("%s observations.\n", nrow(object@z)))
cat(sprintf("Response rate (w) of %.4g percent.\n", 100 * object@w[2]))
})
#'@rdname pData-methods
#'@importFrom data.table rbindlist
#'@method pData MIMOSAResultList
pData.MIMOSAResultList <- function(object) {
do.call(rbind,lapply(object, pData))
}
#'@rdname pData-methods
#'@method pData MIMOSAResultList
#'@aliases pData,MIMOSAResultList-method
setMethod("pData", "MIMOSAResultList", pData.MIMOSAResultList)
#'Extract the posterior probabilities of response from a MIMOSA model
#'
#'@rdname MIMOSA-accessors
#'@param x output from a MIMOSA model
#'@return a \code{matrix} of posterior probabilities
#'@examples
#' data(ICS)
##' E<-ConstructMIMOSAExpressionSet(ICS,
##' reference=ANTIGEN%in%'negctrl',measure.columns=c('CYTNUM','NSUB'),
##' other.annotations=c('CYTOKINE','TCELLSUBSET','ANTIGEN','UID'),
##' default.cast.formula=component~UID+ANTIGEN+CYTOKINE+TCELLSUBSET,
##' .variables=.(TCELLSUBSET,CYTOKINE,UID),
##' featureCols=1,ref.append.replace='_REF')
##'
##' result<-MIMOSA(NSUB+CYTNUM~UID+TCELLSUBSET+CYTOKINE|ANTIGEN,
##' data=E, method='EM',
##' subset=RefTreat%in%'Treatment'&ANTIGEN%in%'ENV',
##' ref=ANTIGEN%in%'ENV'&RefTreat%in%'Reference')
##' getZ(result)
#'@export
getZ <- function(x) {
UseMethod("getZ")
}
#'@rdname MIMOSA-accessors
#'@method getZ MIMOSAResultList
#'@export
getZ.MIMOSAResultList <- function(x) {
as.matrix(do.call(rbind, lapply(x, getZ)))
}
#'@rdname MIMOSA-accessors
#'@method getZ MIMOSAResult
#'@export
getZ.MIMOSAResult <- function(x) {
z <- x@z
colnames(z) <- c("Pr.Nonresponse", "Pr.response")
z
}
#'Extract the component weights from a MIMOSA model
#'
#'@rdname MIMOSA-accessors
#'@return a \code{vector} of component weights
#'@examples
#'data(ICS)
##' E<-ConstructMIMOSAExpressionSet(ICS,
##' reference=ANTIGEN%in%'negctrl',measure.columns=c('CYTNUM','NSUB'),
##' other.annotations=c('CYTOKINE','TCELLSUBSET','ANTIGEN','UID'),
##' default.cast.formula=component~UID+ANTIGEN+CYTOKINE+TCELLSUBSET,
##' .variables=.(TCELLSUBSET,CYTOKINE,UID),
##' featureCols=1,ref.append.replace='_REF')
##'
##' result<-MIMOSA(NSUB+CYTNUM~UID+TCELLSUBSET+CYTOKINE|ANTIGEN,
##' data=E, method='EM',
##' subset=RefTreat%in%'Treatment'&ANTIGEN%in%'ENV',
##' ref=ANTIGEN%in%'ENV'&RefTreat%in%'Reference')
##' getW(result)
#'@export
getW <- function(x) {
UseMethod("getW")
}
#'@rdname MIMOSA-accessors
#'@method getW MIMOSAResultList
#'@export
getW.MIMOSAResultList <- function(x) {
w <- data.frame(lapply(x, getW))
colnames(w) <- names(x)
w
}
#'@rdname MIMOSA-accessors
#'@method getW MIMOSAResult
#'@export
getW.MIMOSAResult <- function(x) {
w <- x@w
names(w) <- c("w.nonresp", "w.resp")
w
}
countsTable <- function(object, proportion = FALSE) {
}
#'Extract the table of counts from a MIMOSA model
#'
#'@param object a \code{MIMOSAResult}
#'@param proportion \code{logical} return the counts or the proportions
#'@return a \code{data.frame} of counts to which the model was fit.
#'@rdname countsTable
#'@docType methods
#'@return a \code{data.frame} of counts for the stimulated and unstimulated samples
#'@examples
##' data(ICS)
##' E<-ConstructMIMOSAExpressionSet(ICS,
##' reference=ANTIGEN%in%'negctrl',measure.columns=c('CYTNUM','NSUB'),
##' other.annotations=c('CYTOKINE','TCELLSUBSET','ANTIGEN','UID'),
##' default.cast.formula=component~UID+ANTIGEN+CYTOKINE+TCELLSUBSET,
##' .variables=.(TCELLSUBSET,CYTOKINE,UID),
##' featureCols=1,ref.append.replace='_REF')
##'
##' result<-MIMOSA(NSUB+CYTNUM~UID+TCELLSUBSET+CYTOKINE|ANTIGEN,
##' data=E, method='EM',
##' subset=RefTreat%in%'Treatment'&ANTIGEN%in%'ENV',
##' ref=ANTIGEN%in%'ENV'&RefTreat%in%'Reference')
##' head(countsTable(result))
##' head(countsTable(result,proportion=TRUE))
#'@export
setGeneric("countsTable")
#'@rdname countsTable
#'@method countsTable MIMOSAResult
#'@aliases countsTable,MIMOSAResult-method
#'@export
setMethod("countsTable", "MIMOSAResult", definition = function(object, proportion = FALSE) {
countsTable(object@result, proportion = proportion)
})
#'@rdname countsTable
#'@method countsTable MCMCResult
#'@aliases countsTable,MCMCResult-method
#'@export
setMethod("countsTable", "MCMCResult", definition = function(object, proportion = FALSE) {
if (proportion) {
m <- (cbind(prop.table(as.matrix(object@n.stim), 1), prop.table(as.matrix(object@n.unstim),
1)))
} else {
m <- (as.matrix(cbind(object@n.stim, object@n.unstim)))
}
nc <- ncol(m)
colnames(m) <- paste0(colnames(m), rep(c("", "_REF"), each = nc/2))
return(m)
})
#'@rdname countsTable
#'@method countsTable MDMixResult
#'@aliases countsTable,MDMixResult-method
#'@export
setMethod("countsTable", "MDMixResult", definition = function(object, proportion = FALSE) {
if (proportion == TRUE) {
m <- (do.call(cbind, lapply(object@data, function(x) prop.table(as.matrix(x),
1))))
} else {
m <- (as.matrix(do.call(cbind, object@data)))
}
nc <- ncol(m)
colnames(m) <- paste0(colnames(m), rep(c("", "_REF"), each = nc/2))
return(m)
})
#'@rdname countsTable
#'@method countsTable MIMOSAResultList
#'@export
countsTable.MIMOSAResultList <- function(object, proportion = FALSE) {
as.matrix(do.call(rbind, lapply(object, function(x) countsTable(x, proportion = proportion))))
}
#'@rdname countsTable
#'@method countsTable MIMOSAResultList
#'@aliases countsTable,MIMOSAResultList-method
#'@export
setMethod("countsTable", "MIMOSAResultList", countsTable.MIMOSAResultList)
#'Volcano plot for a MIMOSA model
#'
#'Plots effect size vs posterior probablilty of response from a MIMOSAResultList, faceting by the conditioning variables.
#'
#'@rdname volcanoPlot
#'@param x A \code{MIMOSAResultList}
#'@param effect_expression an \code{expression} that defines the effect size. Usually a function of the stimulated and unstimulated proportions from \code{countsTable(x,proportion=TRUE)}
#'@param facet_var an \code{expression} defining the faceting in ggplot parlance. i.e. \code{~ faceting + variables}
#'@param threshold a \code{numeric} value between [0,1] for coloring significant observations (based on the q-value)
#'@examples
#'data(ICS)
##' E<-ConstructMIMOSAExpressionSet(ICS,
##' reference=ANTIGEN%in%'negctrl',measure.columns=c('CYTNUM','NSUB'),
##' other.annotations=c('CYTOKINE','TCELLSUBSET','ANTIGEN','UID'),
##' default.cast.formula=component~UID+ANTIGEN+CYTOKINE+TCELLSUBSET,
##' .variables=.(TCELLSUBSET,CYTOKINE,UID),
##' featureCols=1,ref.append.replace='_REF')
##'
##' result<-MIMOSA(NSUB+CYTNUM~UID+TCELLSUBSET+CYTOKINE|ANTIGEN,
##' data=E, method='EM',
##' subset=RefTreat%in%'Treatment'&ANTIGEN%in%'ENV',
##' ref=ANTIGEN%in%'ENV'&RefTreat%in%'Reference')
##' volcanoPlot(result,CYTNUM-CYTNUM_REF)
#'@importFrom ggplot2 ggplot geom_point theme_bw aes_string scale_y_continuous
#'@seealso \code{\link{countsTable}}
#'@export
volcanoPlot <- function(x, effect_expression = NA, facet_var = NA, threshold = 0.01) {
UseMethod("volcanoPlot")
}
#'@method volcanoPlot MIMOSAResultList
#'@importFrom data.table data.table
#'@export
volcanoPlot.MIMOSAResultList <- function(x, effect_expression = NA, facet_var = NA,
threshold = 0.01) {
err <- FALSE
effect_expression <- deparse(substitute(effect_expression))
if (effect_expression %in% "NA")
err <- TRUE
if (err) {
stop("Must provide an expression for the effect size (i.e. CYTNUM-CYTNUM_REF)")
}
q <- -log10(unlist(fdr(x), use.names = FALSE))
pspu <- countsTable(x, proportion = TRUE)
p.stim <- getZ(x)
pd <- pData(x)
df <- data.table(q, pspu, p.stim, pd, signif = q > -log10(threshold))
if (!is.na(effect_expression)) {
p <- ggplot(df) + aes_string(x = effect_expression, y = "Pr.response", col = "signif.fdr") +
geom_point() + theme_bw() + scale_y_continuous("Probability of Response")
}
if (is.formula(facet_var)) {
p <- p + facet_grid(facet_var)
}
p
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.