#' MetaDeconfound
#'
#' MetaDeconfound checks all feature <-> covariate combinations for
#' counfounding effects of covariates on feature <-> effect correlation
#'
#' @param featureMat a data frame with row(sample ID)
#' and column(feature such as metabolite or microbial OTU )
#' names, listing features for all samples
#' @param metaMat a data frame with row(sample ID) and
#' column(meta data such as age,BMI and all possible confounders)
#' names listing metadata for all samples. first column should be case status
#' with case=1 and control=0. All binary variables need to be in 0/1 syntax!
#' @param nnodes number of nodes/cores to be used for parallel processing
#' @param adjustMethod multiple testing p-value correction using one of the
#' methods of \link[stats]{p.adjust.methods}
#' @param robustCutoff minimal number of sample size for each covariate
#' in order to have sufficient power for association testing
#' @param QCutoff significance cutoff for q-value, DEFAULT = 0.1
#' @param DCutoff effect size cutoff
#' (either cliff's delta or spearman correlation test estimate), DEFAULT = 0
#' @param PHS_cutoff PostHoc Significance cutoff
#' @param logfile name of optional logging file.
#' @param logLevel logging verbosity, possible levels:
#' TRACE, DEBUG, INFO, WARN, ERROR, FATAL, DEFAULT = INFO
#' @param startStop vector of optional strings controlling which
#' parts of the pipeline should be executed.
#' ("naiveStop": only naive associations will be computed, no confounder analysis is done)
#' @param QValues optional data.frame containing pre-computed multiple-testing corrected p-values for naive associations
#' @param DValues optional data.frame containing pre-computed effect sizes for naive associations
#' @param minQValues pessimistic qvalues, can be generated by
#' \link[metadeconfoundR]{ImportLongPrior}.
#' This dataframe of QValues is used to incorporate prior knowledge of
#' potential associations between individual features and metadata by supplying
#' QValues < QCutoff for these associations. All significant associations thus
#' reported will be treated as potentially confounding influences.
#' @param deconfT vector of metavariable names *always* to be included as potential confounder
#' @param deconfF vector of metavariable names *never* to be included as potential confounder
#' @param doConfs optional parameter for additional computation of confidence
#' interval of linear models in the deconfounding step
#' (0 = no , 1 = logging, 2 = strict)
#' @param doRanks optional vector of metavariable names, that should be rank
#' transformed when building linear models in the doconfounding step
#' @param randomVar optional vector of metavariable names to be treated as
#' random effect variables. These variables will not be tested for naive
#' associations and will not be included as potential confounders,
#' but will be added as random effects "+ (1|variable)" into any models being built.
#' Any associations reducible to the supplied random effect(s) will be labeled
#' as "NS". Note: Ps, Qs, Ds are computed independently and thereby not changed
#' through inclusion of random effects.
#' @param fixedVar optional vector of metavariable names to be treated as
#' fixed effect variables. These variabels will not be tested for naive
#' associations and will not be included as potential confounders,
#' but will be added as fixed effects "+ variable" into any models being built.
#' Any associations reducible to the supplied fixed effect(s) will be labeled
#' as "NS". Note: Ps, Qs, Ds are computed independently and thereby not changed
#' through inclusion of fixed effects.
#' @param robustCutoffRho optional robustness cutoff for continuous variables
#' @param typeCategorical optional character vector of metavariable names to
#' always be treated as categorical
#' @param typeContinuous optional character vector of metavariable names to
#' always be treated as continuous
#' @param logistic optional logical parameter; DEFAULT = FALSE;
#' Set TRUE to treat supplied features as binary instead of continuous
#' @param rawCounts optional logical parameter; DEFAULT = FALSE;
#' Set TRUE to treat supplied features as not normalized/rarefied counts;
#' metadeconfoundR will compute total read count per sample and include this
#' information in the modelling steps. WARNING: naive associations in
#' first part of metadeconfoundR are computed on TSS-transformed version of input data.
#' @param returnLong DEFAULT = FALSE; Set TRUE to get output in one long
#' format data.frame instead of list of four wide format data.frames
#' @param collectMods DEFAULT = FALSE; Set TRUE to collect all model objects
#' generated by Metadeconfound and return them in a nested list alongside the
#' standard Ps/Qs/Ds/status output.
#' @param ... for additional arguments used internally (development/debugging)
#' @return list with elements (or data.frame with columns, when returnLong = TRUE) Ds = effectsize,
#' Ps = uncorrected p-value for naive association,
#' Qs = multiple testing corrected p-value/fdr,
#' and status = confounding status for all
#' feature <=> covariate combinations with following categories:
#' (NS = not significant, OK_sd = strictly deconfounded, OK_nc = no covariates,
#' OK_d = doubtful, AD = ambiguously deconfounded, C: followed by comma
#' separated covariate names = confounded by listed covariates)\cr
#'
#' Can be plotted using \link[metadeconfoundR]{BuildHeatmap}.
#' @details for more details and explanations please see the vignette.
#' @examples
#'data(reduced_feature)
#'data(metaMatMetformin)
#'\donttest{
#'example_output <- MetaDeconfound(featureMat = reduced_feature,
#' metaMat = metaMatMetformin,
#' logLevel = "ERROR")
#'}
#'
#' @import futile.logger
#' @importFrom reshape2 melt
#' @importFrom methods is
#' @export
#'
MetaDeconfound <- function(featureMat,
metaMat,
nnodes = 1,
adjustMethod = "fdr",
robustCutoff = 5,
QCutoff = 0.1,
DCutoff = 0,
PHS_cutoff = 0.05,
logfile = NULL,
logLevel = "INFO", # new TB20240602
startStop = NA,
QValues = NA,
DValues = NA,
minQValues= NULL,
deconfT = NULL,
deconfF = NULL,
doConfs = 0,
doRanks = NA,
randomVar = NA,
fixedVar = NA, # new TB20230727
robustCutoffRho = NULL, # new SKF20200221
typeCategorical = NULL, # new SKF20200221
typeContinuous = NULL, # new SKF20200221
logistic = FALSE, # new SKF20201017
rawCounts = FALSE, # new TB20220202
returnLong = FALSE, # new TB20210409
collectMods = FALSE, # new TB20220208
...) {
futile.logger::flog.logger("my.logger", logLevel)
futile.logger::flog.threshold(logLevel, name='my.logger')
if (is.null(logfile)) {
futile.logger::flog.appender(appender.console(), name = 'my.logger')
} else {
futile.logger::flog.appender(appender.file(logfile), name='my.logger')
}
###
###
### logging start and initial sanity checks on input paramters
###
###
if (is.null(logfile) && (doConfs > 0)) {
stop('Error - "doConfs" parameeter is set to > 0 but "logfile" is not specified. Can not log warnings for confidence intervalls spanning 0.')
}
futile.logger::flog.info(msg = '###',
name = "my.logger"
)
futile.logger::flog.info(msg = '###',
name = "my.logger"
)
futile.logger::flog.info(msg = 'Deconfounding run started',
name = "my.logger"
)
if ("naiveStop" %in% startStop) {
futile.logger::flog.warn(msg = 'Detected "naiveStop" in "startStop" paramter. Will only return naive associations.',
name = "my.logger")
}
if (missing(metaMat)) {
futile.logger::flog.error(msg = 'Necessary argument "metaMat" missing.',
name = "my.logger")
stop('Error - Necessary argument "metaMat" missing.')
}
if (missing(featureMat)) {
futile.logger::flog.error(msg = 'Necessary argument "featureMat" missing.',
name = "my.logger")
stop('Error - Necessary argument "featureMat" missing.')
}
if (is(featureMat, "tbl") | is(metaMat, "tbl")) {
futile.logger::flog.warn(msg = "Tibbles detected in input data frames. This might lead to unexpected behaviors. Please convert to standard data.frame class!",
name = "my.logger")
}
if (nrow(metaMat) != nrow(featureMat)) {
futile.logger::flog.error(msg = "featureMat and metaMat don't have same number of rows.",
name = "my.logger")
stop("featureMat and metaMat don't have same number of rows.")
}
if (any(order(rownames(metaMat)) != order(rownames(featureMat)))) {
futile.logger::flog.error(msg = "rownames of featureMat and metaMat don't have same order.",
name = "my.logger")
stop("Rownames of featureMat and metaMat don't have same order.
(order(rownames(metaMat)) != order(rownames(featureMat)))")
}
# check proper naming of rows and columns
faultyColnamesMeta <- colnames(metaMat)[which(colnames(metaMat) != make.names(colnames(metaMat)))]
faultyColnamesFeat <- colnames(featureMat)[which(colnames(featureMat) != make.names(colnames(featureMat)))]
faultyRownamesMeta <-rownames(metaMat)[which(rownames(metaMat) != make.names(rownames(metaMat)))]
if (length(c(faultyColnamesMeta, faultyColnamesFeat, faultyRownamesMeta)) > 0) {
futile.logger::flog.warn(msg = "Unallowed characters detected in rownames and/or colnames of featureMat and/or metaMat!\n
metadeconfoundR will try to remove these characters using the make.names() function.",
name = "my.logger")
colnames(metaMat) <- make.names(colnames(metaMat), unique = T)
colnames(featureMat) <- make.names(colnames(featureMat), unique = T)
rownames(metaMat) <- make.names(rownames(metaMat), unique = T)
rownames(featureMat) <- make.names(rownames(featureMat), unique = T)
}
if (!is.null(deconfT) | !is.null(deconfF)) {
if ((sum(deconfT %in% colnames(metaMat)) < length(deconfT)) |
(sum(deconfF %in% colnames(metaMat)) < length(deconfF))) {
futile.logger::flog.error(msg = "Elements of deconfT/deconfF are not present in colnames of metaMat.",
name = "my.logger")
futile.logger::flog.info(msg = "Check identical spelling of variable
names in deconfT/deconfF and metaMat",
name = "my.logger")
stop("Elements of deconfT/deconfF are not present in colnames of metaMat.")
} else if (sum(deconfT %in% deconfF) > 0) {
futile.logger::flog.error(msg = "Some elements of deconfT and deconfF seem to be identical.",
name = "my.logger")
stop("Some elements of deconfT and deconfF seem to be identical.")
}
}
if (is.null(QValues) || is.null(DValues)) {
futile.logger::flog.error(msg = "QValues and/or DValues argument is supplied but seems to be empty (NULL).",
name = "my.logger")
stop("QValues and/or DValues argument is supplied but seems to be empty (NULL).")
}
if (rawCounts == TRUE) {
if (logistic == TRUE) {
futile.logger::flog.error(msg = "rawCounts and logistic can not be both set to TRUE!",
name = "my.logger")
stop("rawCounts and logistic can not be both set to TRUE!")
}
futile.logger::flog.warn(msg = 'Raw count mode is anabled!For computation of naive associations only, raw counts are being normalized by dividing each sample by its total count! ',
name = "my.logger")
}
if (is(randomVar, "list")) {
futile.logger::flog.error(msg = "randomVar does not need to be supplied as list anymore, please change to new syntax.",
name = "my.logger")
stop("randomVar does not need to be supplied as list anymore, please change to new syntax.")
}
.MetaDeconfound(featureMat = featureMat,
metaMat = metaMat,
nnodes = nnodes,
adjustMethod = adjustMethod,
robustCutoff = robustCutoff,
QCutoff = QCutoff,
DCutoff = DCutoff,
PHS_cutoff = PHS_cutoff,
logfile = logfile,
logLevel = logfile, # new TB20240602
startStop = startStop,
QValues = QValues,
DValues = DValues,
minQValues=minQValues,
deconfT = deconfT,
deconfF = deconfF,
doConfs = doConfs,
doRanks = doRanks,
randomVar = randomVar,
fixedVar = fixedVar, # new TB20230727
robustCutoffRho = robustCutoffRho, # new SKF20200221
typeCategorical = typeCategorical, # new SKF20200221
typeContinuous = typeContinuous, # new SKF20200221
logistic = logistic, # new SKF20201017
rawCounts = rawCounts, # new TB20220202
returnLong = returnLong, # new TB20210409
collectMods = collectMods, # new TB20220208
...
#verbosity = verbosity
)
}
.MetaDeconfound <- function(featureMat,
metaMat,
nnodes = 1,
robustCutoff = 5,
adjustMethod = "fdr",
QCutoff = 0.1,
DCutoff = 0,
PHS_cutoff = 0.05,
logfile = NULL,
logLevel = "INFO", # new TB20240602
startStop = NA,
QValues = NA,
DValues = NA,
minQValues=NULL,
deconfT = NULL,
deconfF = NULL,
doConfs = 0,
doRanks = NA,
randomVar = NA,
fixedVar = NA, # new TB20230727
robustCutoffRho = NULL, # new SKF20200221
typeCategorical = NULL, # new SKF20200221
typeContinuous = NULL, # new SKF20200221
logistic = FALSE, # new SKF20201017
rawCounts = FALSE, # new TB20220202
returnLong = FALSE, # new TB20210409
collectMods = FALSE, # new TB20220208
verbosity = "silent",
nAGQ = 1 # new TB20221201
) {
if (nnodes < 2) {
futile.logger::flog.info(msg = 'Computing in serial mode. Set nnodes > 1 do to switch to faster parallel processing.',
name = "my.logger")
nnodes <- 1
}
if (length(randomVar) > 1) {
if (nAGQ > 1) {
nAGQ <- 1
futile.logger::flog.warn(msg = "nAGQ was set to 1. Can not be > 1 if more than one random variable is added to a glmer.",
name = "my.logger")
}
}
if (collectMods == TRUE) {
futile.logger::flog.warn(msg = "collectMods was set to TRUE, model building step is run with nnodes = 1.",
name = "my.logger")
}
samples <- row.names (featureMat)
features <- colnames (featureMat)
noFeatures <- length (features)
if (is.null (robustCutoffRho)) { robustCutoffRho <- robustCutoff } # new SKF20200221
RVnames <- NA
covariates <- colnames (metaMat) # each covariate + the status category
if (!is.na(randomVar[[1]])) { # list input parameter is split for further use within pipeline
RVnames <- randomVar
futile.logger::flog.info(msg = paste0("The following parameters will be added to all linear models as random effects: '", randomVar, "'"),
name = "my.logger")
futile.logger::flog.info(msg = paste(randomVar),
name = "my.logger")
futile.logger::flog.warn(msg = paste0("the following random effect covariates will be excluded as potential donfounders: ", paste0(RVnames, collapse = ", ")),
name = "my.logger")
futile.logger::flog.warn(msg = "naive associations reducible to these efects will get the status label 'NS', while output elements Ps, Qs, and Ds will be unchanged.",
name = "my.logger")
}
if (!is.na(fixedVar[[1]])) {
RVnames <- na.omit(c(RVnames, fixedVar))
futile.logger::flog.info(msg = paste0("The following parameters will be added to all linear models as fixed effects: '", fixedVar, "'"),
name = "my.logger")
futile.logger::flog.info(msg = paste(fixedVar),
name = "my.logger")
futile.logger::flog.warn(msg = paste0("the following fixed effect covariates will be excluded as potential donfounders: ", paste0(RVnames, collapse = ", ")),
name = "my.logger")
futile.logger::flog.warn(msg = "naive associations reducible to these efects will get the status label 'NS', while output elements Ps, Qs, and Ds will be unchanged.",
name = "my.logger")
}
noCovariates <- length (covariates)
futile.logger::flog.info(msg = paste0("Checking robustness of data for covariates"),
name = "my.logger")
isRobust <- CheckSufficientPower(metaMat = metaMat,
covariates = covariates,
noCovariates = noCovariates,
nnodes = nnodes,
robustCutoff = robustCutoff,
robustCutoffRho = robustCutoffRho, # new SKF20200221
typeCategorical = typeCategorical, # new SKF20200221
typeContinuous = typeContinuous, # new SKF20200221
verbosity = verbosity,
RVnames = RVnames,
startStop = startStop,
deconfF = deconfF) # new TB20220704
futile.logger::flog.debug(msg = paste(
"CheckSufficientPower -- (dim(isRobust[[2]]):",
paste(dim(isRobust[[2]]), collapse = ", "),
name = "my.logger"
))
if (!all(isRobust[[1]])) {
futile.logger::flog.info(msg = paste0((length(isRobust[[1]]) - sum(isRobust[[1]])), " covariates where marked as too sparse and won't be considered in further analysis due to lack of sufficient data: ",
sub(pattern = ", ", replacement = "", x = paste0(", ", names(isRobust[[1]][isRobust[[1]] == 0]), collapse = ""))),
name = "my.logger")
}
if (is.na(QValues[[1]]) | is.na(DValues[[1]])) {
# if no external Qs and Ds are supplied, compute them!
futile.logger::flog.info(msg = "Computation of naive associations started.",
name = "my.logger")
naiveAssociation <- NaiveAssociation(
featureMat = featureMat,
samples = samples,
features = features,
noFeatures = noFeatures,
metaMat = metaMat,
covariates = covariates,
noCovariates = noCovariates,
isRobust = isRobust,
typeCategorical = typeCategorical,# new SKF20200221
typeContinuous = typeContinuous,# new SKF20200221
logistic = logistic, # new SKF20201017
adjustMethod = adjustMethod,
nnodes = nnodes,
rawCounts = rawCounts,# new TB20221129
verbosity = verbosity
)
if ("naiveStop" %in% startStop) {
futile.logger::flog.warn(msg = paste('Process stopped before computing confounding status because "startStop" parameter contained "naiveStop". '),
name = "my.logger")
if (!returnLong) {
return(list(Ps = naiveAssociation$Ps,
Qs = naiveAssociation$Qs,
Ds = naiveAssociation$Ds
))
}
long_out <- reshape2::melt(naiveAssociation$Ps, varnames = c("feature", "metaVariable"), value.name = "Ps")
long_out$Qs <- reshape2::melt(naiveAssociation$Qs)[, 3]
long_out$Ds <- reshape2::melt(naiveAssociation$Ds)[, 3]
return(long_out)
}
} else { # if precomputed Qs and Ds are supplied as arguments
naiveAssociation <- list(Ps = QValues, Qs = QValues, Ds = DValues)
futile.logger::flog.warn(msg = paste("Naive p-values are NOT computed, but simply are a copy of the supplied adjusted p-values (QValues)."),
name = "my.logger")
futile.logger::flog.warn(msg = paste('Confonding status is computed based on Q-values and effect sizes supplied via "QValue" and "DValue" parameters. '),
name = "my.logger")
}
if (collectMods == TRUE) {
nnodes <- 1
futile.logger::flog.warn(msg = paste('collectMods == TRUE --> setting nnodes = 1 for model building phase.'),
name = "my.logger")
}
reducibilityStatus <- CheckReducibility(featureMat = featureMat,
metaMat = metaMat,
noFeatures = noFeatures,
noCovariates = noCovariates,
features = features,
covariates = covariates,
Qs = naiveAssociation$Qs,
Ds = naiveAssociation$Ds,
minQValues= minQValues,
nnodes = nnodes,
QCutoff = QCutoff,
DCutoff = DCutoff,
PHS_cutoff = PHS_cutoff,
deconfT = deconfT,
deconfF = deconfF,
doConfs = doConfs,
doRanks = doRanks,
randomVar = randomVar,
fixedVar = fixedVar, # new TB20230727
RVnames = RVnames,
isRobust = isRobust,
logistic = logistic, # new SKF20201017,
rawCounts = rawCounts, # new TB20220202
verbosity = verbosity,
nAGQ = nAGQ, # new TB 20221201
collectMods = collectMods, # new TB20220208
)
#}
if (collectMods) {
collectedMods <- reducibilityStatus[[2]]
reducibilityStatus <-reducibilityStatus[[1]]
}
futile.logger::flog.info(msg = "MetadecondoundR run completed successfully!",
name = "my.logger")
if (!returnLong) {
returnList <- list(Ps = naiveAssociation$Ps,
Qs = naiveAssociation$Qs,
Ds = naiveAssociation$Ds,
status=reducibilityStatus)
if (collectMods) {
returnList$collectedMods <- collectedMods
}
return(returnList)
}
long_out <- reshape2::melt(naiveAssociation$Ps, varnames = c("feature", "metaVariable"), value.name = "Ps")
long_out$Qs <- reshape2::melt(naiveAssociation$Qs)[, 3]
long_out$Ds <- reshape2::melt(naiveAssociation$Ds)[, 3]
long_out$status <- reshape2::melt(reducibilityStatus)[, 3]
#long_out_signif <- subset(x = long_out, (status != "NS") & !is.na(status))
if (collectMods) {
long_out <- list(stdOutput = long_out,
collectedMods = collectedMods)
futile.logger::flog.warn(msg = 'Collected models are part ouf output. Only use output[["stdOutput"]] as BuildHeatmap input!',
name = "my.logger")
}
return(long_out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.