#==============================================================================
#
# fit_lmx: lm / lme / lmer
#
#==============================================================================
.cbindstats <- function(
fitres, fname = 'F value', f.pname = 'Pr(>F)', pname = 'Pr(>|t|)',
tname = 't value', effectname = 'Estimate', sename = 'Std. Error'
){
. <- NULL
fval <- stats::anova(fitres)['subgroup', fname]
f.p <- stats::anova(fitres)['subgroup', f.pname]
fitres %<>% summary()
fitres %<>% stats::coefficients()
fitres
pvalues <- as.data.table(as.list(fitres[, pname, drop = FALSE]))
tvalues <- as.data.table(as.list(fitres[, tname, drop = FALSE]))
effects <- as.data.table(as.list(fitres[, effectname, drop = FALSE]))
stderrs <- as.data.table(as.list(fitres[, sename, drop = FALSE]))
names(pvalues) %<>% paste0('p.', .)
names(tvalues) %<>% paste0('t.', .)
names(effects) %<>% paste0('effect.', .)
names(stderrs) %<>% paste0('se.', .)
cbind(pvalues, tvalues, effects, stderrs, F = fval, F.p = f.p )
}
.lm <- function(sd, formula, block, weights, statvars, sep, optim = NULL){
# Initialize
value <- NULL
formula <- as.formula(formula)
environment(formula) <- environment()
# Run mock lm on zero-imputed data to get all potential coefnames
# before any get dropped because of all values in a block being NA (for this feature)
sd0 <- copy(sd)
sd0[is.na(value), value := 0]
fitres0 <- lm( formula = formula, data = sd0, weights = weights, na.action = stats::na.omit )
fitres0 %<>% summary()
fitres0 %<>% stats::coefficients()
fitres0[] <- NA_real_ # Set coefvalues to NA, this is mock data only
# Run actual lm on actual data
# Rbind missing coefficients from mock lm
fitres <- lm( formula = formula, data = sd, weights = weights, na.action = stats::na.omit )
Fp <- suppressWarnings(stats::anova(fitres)) # ANOVA F-tests on an essentially perfect fit are unreliable
Fp <- Fp %>% extract(-nrow(.), , drop = FALSE)
Fp <- Fp[, 'Pr(>F)'] %>% set_names(rownames(Fp))
names(Fp) %<>% paste0('p~', .)
fitres %<>% summary() # weights: stackoverflow.com/questions/51142338
fitres %<>% stats::coefficients()
rows <- setdiff(rownames(fitres0), rownames(fitres))
if (!is.null(rows)){ fitres %<>% rbind( fitres0[ rows , , drop = FALSE] )
fitres %<>% extract(rownames(fitres0), ) }
# Reformat and Return
colnames(fitres) %<>% stri_replace_first_fixed('Estimate', 'effect')
colnames(fitres) %<>% stri_replace_first_fixed('Std. Error', 'se')
colnames(fitres) %<>% stri_replace_first_fixed('t value', 't')
colnames(fitres) %<>% stri_replace_first_fixed('Pr(>|t|)', 'p')
fitres %<>% extract(, statvars, drop = FALSE)
fitmat <- matrix(fitres, nrow = 1)
colnames(fitmat) <- paste(rep(colnames(fitres), each = nrow(fitres)),
rep(rownames(fitres), times = ncol(fitres)), sep = sep)
data.table(cbind(fitmat , t(Fp)))
}
.lme <- function(sd, formula, block, weights, statvars, sep, opt = 'optim'){
ctrl <- nlme::lmeControl(opt = opt) # https://stats.stackexchange.com/a/40664
fitres <- nlme::lme( fixed = formula,
random = block,
data = sd,
na.action = stats::na.omit,
control = ctrl )
Fp <- suppressWarnings(stats::anova(fitres)[-1, , drop = FALSE])
Fp <- Fp[, 'p-value'] %>% set_names(rownames(Fp))
names(Fp) %<>% paste0('p~', .)
suppressWarnings(fitres %<>% summary()) # only 2 replicates in a group -> df = 0 -> p = NaN -> warning
fitres %<>% stats::coefficients()
colnames(fitres) %<>% stri_replace_first_fixed('Value', 'effect')
colnames(fitres) %<>% stri_replace_first_fixed('Std.Error', 'se')
colnames(fitres) %<>% stri_replace_first_fixed('t-value', 't')
colnames(fitres) %<>% stri_replace_first_fixed('p-value', 'p')
fitres %<>% extract(, statvars, drop = FALSE)
fitmat <- matrix(fitres, nrow = 1)
colnames(fitmat) <- paste(rep(colnames(fitres), each = nrow(fitres)),
rep(rownames(fitres), times = ncol(fitres)), sep = sep )
data.table(cbind(fitmat, t(Fp)))
}
.lmer <- function(sd, formula, block = NULL, weights, statvars, sep, optim = NULL){
fitres <- lme4::lmer( formula = formula,
data = sd,
weights = weights,
na.action = stats::na.omit, # https://stackoverflow.com/a/55367171
control = lme4::lmerControl( check.conv.grad = lme4::.makeCC(action = 'ignore', tol=2e-3 ),
check.conv.singular = lme4::.makeCC(action = "ignore", tol=1e-4 ),
check.conv.hess = lme4::.makeCC(action = 'ignore', tol=1e-6 )))
fitres %<>% lmerTest::as_lmerModLmerTest()
Fp <- suppressWarnings(stats::anova(fitres))
Fp <- Fp[, 'Pr(>F)'] %>% set_names(rownames(Fp))
names(Fp) %<>% paste0('p~', .)
fitres %<>% summary() %>% stats::coefficients()
colnames(fitres) %<>% stri_replace_first_fixed('Estimate', 'effect')
colnames(fitres) %<>% stri_replace_first_fixed('Std. Error', 'se')
colnames(fitres) %<>% stri_replace_first_fixed('t value', 't')
colnames(fitres) %<>% stri_replace_first_fixed('Pr(>|t|)', 'p')
fitres %<>% extract(, statvars, drop = FALSE)
fitmat <- matrix(fitres, nrow=1)
colnames(fitmat) <- paste(rep(colnames(fitres), each = nrow(fitres)),
rep(rownames(fitres), times = ncol(fitres)), sep = sep )
data.table(cbind(fitmat, t(Fp)))
}
.extractstat <- function(fitres, quantity){
idx <- stri_startswith_fixed(names(fitres), paste0(quantity, FITSEP))
mat <- as.matrix(fitres[, idx, with=FALSE])
rownames(mat) <- fitres$feature_id
colnames(mat) %<>% stri_replace_first_fixed(paste0(quantity, FITSEP), '')
mat
}
#' Statistical models supported in autonomics
#' @examples
#' TESTS
#' @export
TESTS <- c('limma','lm','lme','lmer', 'wilcoxon')
addlhs <- function(formula) as.formula(paste0('value ', formula2str(formula)))
droplhs <- function(formula) as.formula(stri_replace_first_regex(
formula2str(formula), '^value[ ]*', ''))
#' Put block in lme-compatible format
#' @param formula formula
#' @param block block: charactervector or formula
#' @param verbose TRUE or FALSE
#' @param ... required for s3 dispatch
#' @examples
#' # lme: ensure lme-compatiblae block specification
#' block2lme( block = list(subject = ~1, batch = ~1))
#' block2lme( block = ~1|subject)
#' block2lme( block = c('subject', 'batch'))
#'
#' # lm: integrate block into formula as random effect
#' formula2lm( formula = ~ subgroup, block = c('subject', 'batch') )
#'
#' # lmer: integrate block into formula as fixed effect
#' formula2lmer( formula = ~ subgroup, block = c('subject', 'batch') )
#' formula2lmer( formula = ~ subgroup + (1|subject) + (1|batch ) )
#' @export
block2lme <- function(block, ...) UseMethod('block2lme')
#' @rdname block2lme
#' @export
block2lme.list <- function(block, verbose = TRUE, ...) return(block)
#' @rdname block2lme
#' @export
block2lme.formula <- function(block, verbose = TRUE, ...){
block0 <- block
block <- formula2str(block0)
block %<>% substr(2,nchar(.)) %>% trimws() # rm ~
block %<>% stri_split_fixed('+') %>% unlist() %>% trimws()
blocknames <- trimws(split_extract_fixed( block, '|', 2 ))
block <- trimws(split_extract_fixed( block, '|', 1 ))
block %<>% paste0('~', .)
block %<>% lapply(as.formula)
block %<>% set_names(blocknames)
return(block)
}
#' @rdname block2lme
#' @export
block2lme.character <- function(block, verbose = TRUE, ...){
block0 <- block
block <- rep('~1', length(block0))
names(block) <- block0
block %<>% lapply(as.formula)
block
}
#' @rdname block2lme
#' @export
formula2lmer <- function(formula, block){
if (stri_detect_fixed(formula2str(formula), '|')) return(formula)
formula %<>% formula2str()
block %<>% paste0('(1|', ., ')')
block %<>% paste0(collapse = ' + ')
formula %<>% paste0(' + ', block)
formula %<>% as.formula()
formula
}
#' @rdname block2lme
#' @export
formula2lm <- function(formula, block){
if (is.null(block)) return(formula)
formula %<>% formula2str()
formula %<>% substr(2,nchar(.))
formula %<>% trimws()
formula %<>% c(block, .) %>% paste0(collapse = '+')
formula %<>% paste0('~', .)
formula %<>% as.formula()
formula
}
#' @rdname block2lme
#' @export
block_vars <- function(formula){
formula %<>% formula2str()
formula %<>% stri_replace_first_regex('^[~][ ]*', '')
formula %<>% stri_split_regex('[ ]*[+][ ]*')
formula %<>% extract2(1)
formula %<>% extract(stri_detect_fixed(., '|'))
formula %<>% stri_replace_first_regex('[ ]*[(][ ]*', '') # lmer
formula %<>% stri_replace_first_regex('[ ]*[)][ ]*', '') # lmer
formula %<>% split_extract_regex('[ ]*[|][ ]*', 2)
formula
}
#' Fit lm, lme, or lmer
#' @param object SummarizedExpriment
#' @param fit 'lm', 'lme', or 'lmer'
#' @param formula formula
#' @param drop TRUE or FALSE
#' @param codingfun factor coding function
#' \itemize{
#' \item contr.treatment: intercept = y0, coefi = yi - y0
#' \item contr.treatment.explicit: intercept = y0, coefi = yi - y0
#' \item code_control: intercept = ymean, coefi = yi - y0
#' \item contr.diff: intercept = y0, coefi = yi - y(i-1)
#' \item code_diff: intercept = ymean, coefi = yi - y(i-1)
#' \item code_diff_forward: intercept = ymean, coefi = yi - y(i+)
#' \item code_deviation: intercept = ymean, coefi = yi - ymean (drop last)
#' \item code_deviation_first: intercept = ymean, coefi = yi - ymean (drop first)
#' \item code_helmert: intercept = ymean, coefi = yi - mean(y0:(yi-1))
#' \item code_helmert_forward: intercept = ymean, coefi = yi - mean(y(i+1):yp)
#' }
#' @param codingfun coding function
#' @param design NULL
#' @param coefs NULL or stringvector
#' @param contrasts unused. only to allow generic get(fitfun)(contrasts)
#' @param block NULL or svar
#' @param opt optimizer used in fit_lme: 'optim' (more robust) or 'nlminb'
#' @param weightvar NULL or svar
#' @param statvars character vector: subset of c('effect', 'p', 'fdr', 't')
#' @param ftest TRUE or FALSE
#' @param sep string
#' @param suffix string: pvar suffix ("lm" in "p~t2~lm")
#' @param verbose TRUE or FALSE
#' @param plot TRUE or FALSE
#' @return SummarizedExperiment
#' @examples
#' file <- system.file('extdata/atkin.metabolon.xlsx', package = 'autonomics')
#' object <- read_metabolon(file)
#' fit_lm( object, formula = ~subgroup)
#' fit_limma( object, formula = ~subgroup)
#' fit_limma( object, formula = ~subgroup, block = 'Subject' )
#' fit_lme( object, formula = ~subgroup, block = 'Subject' )
#' fit_lmer( object, formula = ~subgroup, block = 'Subject' )
#' # fit_lme( object, formula = ~subgroup, block = ~1|Subject) # needs fine-tuning
#' # fit_lmer( object, formula = ~subgroup + (1|Subject)) # needs fine-tuning
#' @export
fit_lmx <- function(
object,
fit,
groupvar = 'subgroup',
formula = as.formula(sprintf('~ %s', groupvar)),
drop = varlevels_dont_clash(object, all.vars(formula)),
codingfun = contr.treatment.explicit,
coefs = model_coefs(object, formula = formula, drop = drop, codingfun = codingfun),
block = NULL,
opt = 'optim',
weightvar = if ('weights' %in% assayNames(object)) 'weights' else NULL,
statvars = c('effect', 'p', 'se', 't')[1:2],
ftest = if (is.null(coefs)) TRUE else FALSE,
sep = FITSEP,
suffix = paste0(sep, fit),
verbose = TRUE,
plot = FALSE
){
# Assert
assert_is_valid_sumexp(object)
assert_scalar_subset(fit, c('lm', 'lme', 'lmer'))
assert_valid_formula(formula, object)
assert_is_a_bool(drop)
if (!is.null(weightvar)){ assert_is_character(weightvar)
assert_is_subset(weightvar, assayNames(object))
message('\t\t\tweights = assays(object)$', weightvar) }
assert_is_subset(statvars, c('effect', 'se', 't', 'p'))
N <- value <- V1 <- NULL
# Filter / Customize
obj <- object
if (verbose) cmessage('%sFilter', spaces(14))
obj %<>% keep_replicated_features( formula, verbose = verbose)
obj %<>% keep_connected_blocks( block, verbose = verbose) # keep samples from fully connected blocks (in sdt, feature-specific NA values not considered)
obj %<>% keep_connected_features( block, verbose = verbose) # keep features with 2+ connected blocks
# Prepare
object %<>% reset_fit(fit)
if ( fit == 'lme' ){ block %<>% block2lme(); mdlvars <- unique(c(all.vars(formula), names(block))) }
if ( fit == 'lmer' ){ formula %<>% formula2lmer(block); mdlvars <- all.vars(formula) }
if ( fit == 'lm' ){ formula %<>% formula2lm( block); mdlvars <- all.vars(formula) }
fstr <- formula2str(formula)
if (verbose) cmessage('%s%s', spaces(14), R.utils::capitalize(fit))
if (verbose & fit == 'lme' ) cmessage('%s%s, random = %s', spaces(22), fstr, capture.output(dput(block)))
if (verbose & fit == 'lmer') cmessage('%s%s', spaces(21), fstr)
if (verbose & fit == 'lm' ) cmessage('%s%s', spaces(21), fstr)
fitmethod <- get(paste0('.', fit))
if (is.null(weightvar)){ weightvar <- 'weights'; weights <- NULL }
assays <- assayNames(object) %>% intersect(c(.[1], 'weights'))
dt <- sumexp_to_longdt(obj, svars = mdlvars, assay = assays)
lhsformula <- addlhs(formula)
# Fit
fitres <- dt[, fitmethod( .SD, formula = lhsformula,
block = block,
weights = get(weightvar),
statvars = statvars,
sep = sep,
opt = opt ), by = 'feature_id' ]
names(fitres) %<>% stri_replace_first_fixed('(Intercept)', 'Intercept')
vars <- all.vars(formula)
if (drop) for (var in vars){ # t.p: p~subgroupt1 -> p~t1
pat <- sprintf('%s(.+)', var) # f.p: p~subgroup
names(fitres) %<>% stri_replace_first_regex(pat, '$1') }
# Extract
pattern <- 'feature_id'
if (!is.null( coefs)) pattern %<>% paste0('|', paste0(coefs, collapse = '|'))
if (ftest) pattern %<>% paste0('|', paste0( vars, collapse = '|'))
pattern %<>% paste0('(', ., ')$')
fitres <- fitres[, .SD, .SDcols = patterns(pattern) ]
names(fitres)[-1] %<>% paste0(suffix)
if (verbose) message_df(' %s', summarize_fit(fitres, fit = fit, coefs = coefs))
# Merge back
object %<>% merge_fit(fitres)
formula %<>% droplhs() %<>% formula2str()
if (!is.null(weights)) formula %<>% paste0(', weights = assays(object)$', weightvar)
if (length(coefs) > 1) coefs %<>% setdiff('Intercept')
if (plot) print(plot_volcano(object, fit = fit, coefs = coefs))
object
}
#' @rdname fit_lmx
#' @export
fit_lm <- function(
object,
groupvar = 'subgroup',
formula = group2formula(groupvar, object),
drop = varlevels_dont_clash(object, all.vars(formula)),
codingfun = contr.treatment.explicit,
design = NULL, # only to make fit(.) work!
block = NULL,
weightvar = if ('weights' %in% assayNames(object)) 'weights' else NULL,
statvars = c('effect', 'p', 'se', 't')[1:2],
sep = FITSEP,
suffix = paste0(sep, 'lm'),
coefs = model_coefs(object, formula = formula, drop = drop, codingfun = codingfun),
contrasts = NULL,
ftest = if (is.null(coefs)) TRUE else FALSE,
verbose = TRUE,
plot = FALSE
){
sdt(object) %<>% code(codingfun = codingfun, vars = all.vars(formula), verbose = verbose)
fit_lmx( object,
fit = 'lm',
formula = formula,
drop = drop,
codingfun = codingfun,
block = block,
weightvar = weightvar,
statvars = statvars,
ftest = ftest,
sep = sep,
suffix = suffix,
coefs = coefs,
verbose = verbose,
plot = plot )
}
#' @rdname fit_lmx
#' @export
fit_lme <- function(
object,
groupvar = 'subgroup',
formula = as.formula(sprintf('~ %s', groupvar)),
drop = varlevels_dont_clash(object, all.vars(formula)),
codingfun = contr.treatment.explicit,
design = NULL, # only to make fit(.) work!
block = NULL,
weightvar = if ('weights' %in% assayNames(object)) 'weights' else NULL,
opt = 'optim',
statvars = c('effect', 'p', 'se', 't')[1:2],
sep = FITSEP,
suffix = paste0(sep, 'lme'),
coefs = model_coefs(object, formula = formula, drop = drop, codingfun = codingfun),
contrasts = NULL,
ftest = if (is.null(coefs)) TRUE else FALSE,
verbose = TRUE,
plot = FALSE
){
# Assert
. <- NULL
if (!requireNamespace('nlme', quietly = TRUE)){
message("BiocManager::install('nlme'). Then re-run.")
return(object) }
# Fit
sdt(object) %<>% code(codingfun = codingfun, vars = all.vars(formula), verbose = verbose)
fit_lmx( object,
fit = 'lme',
formula = formula,
drop = drop,
codingfun = codingfun,
block = block,
weightvar = weightvar,
statvars = statvars,
ftest = ftest,
sep = sep,
suffix = suffix,
opt = opt,
coefs = coefs,
verbose = verbose,
plot = plot )
}
#' @rdname fit_lmx
#' @export
fit_lmer <- function(
object,
groupvar = 'subgroup',
formula = as.formula(sprintf('~ %s', groupvar)),
drop = varlevels_dont_clash(object, all.vars(formula)),
codingfun = contr.treatment.explicit,
design = NULL, # only to make fit(.) work!
block = NULL,
weightvar = if ('weights' %in% assayNames(object)) 'weights' else NULL,
statvars = c('effect', 'p', 'se', 't')[1:2],
sep = FITSEP,
suffix = paste0(sep, 'lmer'),
coefs = model_coefs(object, formula = formula, drop = drop, codingfun = codingfun),
contrasts = NULL,
ftest = if (is.null(coefs)) TRUE else FALSE,
verbose = TRUE,
plot = FALSE
){
# Assert
. <- NULL
if (!requireNamespace('lme4', quietly = TRUE)){
message("`BiocManager::install('lme4')`. Then re-run.")
return(object) }
if (!requireNamespace('lmerTest', quietly = TRUE)){
message("`BiocManager::install('lmerTest')`. Then re-run.")
return(object) }
# Fit
sdt(object) %<>% code(codingfun = codingfun, vars = all.vars(formula), verbose = verbose)
fit_lmx( object,
fit = 'lmer',
formula = formula,
drop = drop,
codingfun = codingfun,
block = block,
weightvar = weightvar,
statvars = statvars,
ftest = ftest,
sep = sep,
suffix = suffix,
coefs = coefs,
verbose = verbose,
plot = plot )
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.