#' Methods to fit msqrob models with ridge regression and/or random effects using lme4
#'
#' @description Parameter estimation of msqrob models for `QFeatures`
#' and `SummarizedExperiment` instance.
#'
#' @aliases msqrob msqrob,SummarizedExperiment-method msqrob,QFeatures-method
#'
#' @author Lieven Clement
#'
#'
#' @examples
#'
#' # Load example data
#' # The data are a Feature object with containing
#' # a SummarizedExperiment named "peptide" with MaxQuant peptide intensities
#' # The data are a subset of spike-in the human-ecoli study
#' # The variable condition in the colData of the Feature object
#' # contains information on the spike in condition a-e (from low to high)
#' data(pe)
#'
#' # Aggregate peptide intensities in protein expression values
#' pe <- aggregateFeatures(pe, i = "peptide", fcol = "Proteins", name = "protein")
#'
#' # Fit MSqrob model using robust linear regression upon summarization of
#' # peptide intensities into protein expression values.
#' # For summarized SummarizedExperiment
#' se <- pe[["protein"]]
#' se
#' colData(se) <- colData(pe)
#' se <- msqrob(se, formula = ~condition, modelColumnName = "rlm")
#' getCoef(rowData(se)$rlm[[1]])
#'
#' # For features object
#' pe <- msqrob(pe, i = "protein", formula = ~condition, modelColumnName = "rlm")
#' # with ridge regression (slower)
#' pe <- msqrob(pe, i = "protein", formula = ~condition, ridge = TRUE, modelColumnName = "ridge")
#'
#' # compare for human protein (no DE)==> large shrinkage to zero
#' cbind(getCoef(rowData(pe[["protein"]])$rlm[[1]]), getCoef(rowData(pe[["protein"]])$ridge[[1]]))
#'
#' # compare for ecoli protein (DE)==> almost no shrinkage to zero
#' cbind(
#' getCoef(rowData(pe[["protein"]])$rlm[["P00956"]]),
#' getCoef(rowData(pe[["protein"]])$ridge[["P00956"]])
#' )
#' @param object `SummarizedExperiment` or `QFeatures` instance
#'
#' @param formula Model formula. The model is built based on the
#' covariates in the data object.
#'
#' @param modelColumnName `character` to indicate the variable name that is used
#' to store the msqrob models in the rowData of the SummarizedExperiment
#' instance or of the assay of the QFeatures instance. Default is "msqrobModels".
#'
#' @param overwrite `boolean(1)` to indicate if the column in the rowData has to
#' be overwritten if the modelColumnName already exists. Default is FALSE.
#'
#' @param robust `boolean(1)` to indicate if robust regression is
#' performed to account for outliers. Default is `TRUE`. If
#' `FALSE` an OLS fit is performed.
#'
#' @param ridge `boolean(1)` to indicate if ridge regression is
#' performed. Default is `FALSE`. If `TRUE` the fixed effects are
#' estimated via penalized regression and shrunken to zero.
#'
#' @param maxitRob `numeric(1)` indicating the maximum iterations in
#' the IRWLS algorithm used in the M-estimation step of the robust
#' regression.
#'
#' @param tol `numeric(1)` indicating the tolerance for declaring convergence
#' of the M-estimation loop.
#'
#' @param doQR `boolean(1)` to indicate if QR decomposition is used when adopting
#' ridge regression. Default is `TRUE`. If `FALSE` the predictors of the fixed
#' effects are not transformed, and the degree of shrinkage can depend on the encoding.
#'
#' @param lmerArgs a list (of correct class, resulting from ‘lmerControl()’
#' containing control parameters, including the nonlinear optimizer to be used
#' and parameters to be passed through to the nonlinear optimizer, see the
#' ‘lmerControl’ documentation of the lme4 package for more details.
#' Default is `list(control = lmerControl(calc.derivs = FALSE))`
#' @rdname msqrob
#'
#' @import SummarizedExperiment
#' @export
setMethod(
"msqrob", "SummarizedExperiment",
function(object,
formula,
modelColumnName = "msqrobModels",
overwrite = FALSE,
robust = TRUE,
ridge = FALSE,
maxitRob = 1,
tol = 1e-6,
doQR = TRUE,
lmerArgs = list(control = lmerControl(calc.derivs = FALSE))) {
if (ncol(colData(object)) == 0) stop("colData is empty")
if ((modelColumnName %in% colnames(rowData(object))) & !overwrite) {
stop(
"There is already a column named \'",
modelColumnName,
"\' in the rowData of the SummarizedExperiment object, set the argument overwrite=TRUE to replace the column with the new results or use another name for the argument modelColumnName to store the results as a novel column in the rowData of SummarizedExperiment object"
)
}
if (length(formula) == 3) {
formula <- formula[-2]
}
if (any(all.vars(formula) %in% colnames(rowData(object)))){
stop(
"Use the msqrobAggregate function to use rowData variables"
)
}
#Get the variables from the formula and check if they are in the coldata or rowdata
check_vars <- all.vars(formula) %in% colnames(colData(object))
if (!all(check_vars)){
if(sum(!check_vars) >1) {
vars_not_found <- paste0(all.vars(formula)[!check_vars], collapse=", ")
stop(paste("Variables", vars_not_found, "are not found in coldata"), sep = "")
} else{
vars_not_found <- all.vars(formula)[!check_vars]
stop(paste0("Variable ", vars_not_found, " is not found in coldata"), sep = "")
}
}
if (!ridge & is.null(findbars(formula))) {
rowData(object)[[modelColumnName]] <- msqrobLm(
y = assay(object),
formula = formula,
data = droplevels(colData(object)),
robust = robust,
maxitRob = maxitRob
)
} else {
rowData(object)[[modelColumnName]] <- msqrobLmer(
y = assay(object),
formula = formula,
data = droplevels(colData(object)),
rowdata = NULL,
robust = robust,
maxitRob = maxitRob,
tol = tol,
doQR = doQR,
ridge = ridge,
lmerArgs = lmerArgs
)
}
rowData(object)[[modelColumnName]] <- rowData(object)[[modelColumnName]][rownames(rowData(object))]
return(object)
}
)
#' @param i `character` or `integer` to specify the element of the `QFeatures` that
#' contains the log expression intensities that will be modelled.
#'
#' @return A SummarizedExperiment or a `QFeatures` instance with the models.
#' @export
#' @rdname msqrob
setMethod(
"msqrob", "QFeatures",
function(object,
i,
formula,
modelColumnName = "msqrobModels",
overwrite = FALSE,
robust = TRUE,
ridge = FALSE,
maxitRob = 1,
tol = 1e-6,
doQR = TRUE,
lmerArgs = list(control = lmerControl(calc.derivs = FALSE))) {
if (is.null(object[[i]])) stop("QFeatures object does not contain an assay with the name ", i)
if ((modelColumnName %in% colnames(rowData(object[[i]]))) & !overwrite) {
stop(
"There is already a column named \'",
modelColumnName,
"\' in the rowData of the assay",
i,
"of the QFeatures object, set the argument overwrite=TRUE to replace the column with the new results or use another name for the argument modelColumnName to store the results as a novel column in the rowData of assay of the QFeatures object"
)
}
if (length(formula) == 3) {
formula <- formula[-2]
}
if (any(all.vars(formula) %in% colnames(rowData(object[[i]])))){
stop(
"Use the msqrobAggregate function to use rowData variables"
)
}
#Get the variables from the formula and check if they are in the coldata or rowdata
check_vars <- all.vars(formula) %in% colnames(colData(object))
if (!all(check_vars)){
if(sum(!check_vars) >1) {
vars_not_found <- paste0(all.vars(formula)[!check_vars], collapse=", ")
stop(paste("Variables", vars_not_found, "are not found in coldata"), sep = "")
} else{
vars_not_found <- all.vars(formula)[!check_vars]
stop(paste0("Variable ", vars_not_found, " is not found in coldata"), sep = "")
}
}
if (!ridge & is.null(findbars(formula))) {
rowData(object[[i]])[[modelColumnName]] <- msqrobLm(
y = assay(object[[i]]),
formula = formula,
data = droplevels(colData(object)),
robust = robust,
maxitRob = maxitRob
)
} else {
rowData(object[[i]])[[modelColumnName]] <- msqrobLmer(
y = assay(object[[i]]),
formula = formula,
data = droplevels(colData(object)),
rowdata = NULL,
robust = robust,
ridge = ridge,
maxitRob = maxitRob,
tol = tol,
doQR = doQR,
lmerArgs = lmerArgs
)
}
rowData(object[[i]])[[modelColumnName]] <- rowData(object[[i]])[[modelColumnName]][rownames(rowData(object[[i]]))]
return(object)
}
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.