R/ImportLongPrior.R

Defines functions ImportLongPrior

Documented in ImportLongPrior

#' ImportLongPrior
#'
#' ImportLongPrior imports prior knowledge of associations between individual
#' features and metadata in form of a long-format dataframe.
#'
#' @param longPrior long-format dataframe as generated by
#' Metadeconfound(returnLong = TRUE). Must contain at least one column
#' containing feature names and one column containing associated metadata names,
#' called "feature" and "metaVariable", respectively. Only associations between
#' features and metadata present in featureMat and metaMat will be returned.
#' Additionally, "Qs" and "status" (as produced by
#' \link[metadeconfoundR]{MetaDeconfound})columns can be supplied and will
#' be parsed as well.
#' If only "feature" and "metaVariable" columns are supplied, all listed
#' associations are assumed to be significant.
#' If "status" is supplied, only non-"NS" labeled associations will be kept.
#' @param featureMat omics features to be analyzed by \link[metadeconfoundR]{MetaDeconfound}
#' @param metaMat metadata to be analyzed by \link[metadeconfoundR]{MetaDeconfound}
#' @return wide-format dataframe that can be used as minQValues parameter in
#' \link[metadeconfoundR]{MetaDeconfound}
#' @details This function is meant to facilitate incorporation of prior
#' knowledge about associations between measured omics features and available
#' metadata both from earlier metadeconfoundR runs by supplying the long-format
#' Metadeconfound(returnLong = TRUE) output directly or by supplying a simple
#' list of known associations from other studies.
#' @examples
#'data(reduced_feature)
#'data(metaMatMetformin)
#'\donttest{
#'
#'# note that this example is only to demonstrate the process of integrating
#'  # prior knowledge into a MetaDeconfound() analysis. Using the output of a
#'  # MetaDeconfound() run as minQValues input for a second run with the exact
#'  # same features and metadata will not lead to any new insights since the set
#'  # of QValues calculated by MetaDeconfound() and the set supplied using the
#'  # minQValues parameter are identical in this case.
#'
#'example_output <- MetaDeconfound(featureMat = reduced_feature,
#'                                   metaMat = metaMatMetformin,
#'                                   returnLong = TRUE,
#'                                   logLevel = "ERROR")
#'
#' minQValues <- ImportLongPrior(longPrior = example_output,
#'                                 featureMat = reduced_feature,
#'                                 metaMat = metaMatMetformin)
#'
#'example_output2 <- MetaDeconfound(featureMat = reduced_feature,
#'                                   metaMat = metaMatMetformin,
#'                                   minQValues = minQValues,
#'                                   logLevel = "ERROR")
#'}

#' @import futile.logger
#' @importFrom reshape2 dcast
#' @export


ImportLongPrior <- function(longPrior,
                            featureMat,
                            metaMat) {

  # assign artificial Qs for input only containing feature, metaVariable columns
    # without addtional info
  if (ncol(longPrior) == 2) {
    longPrior$Qs <- -1
    longPrior <-  longPrior[, c("feature", "metaVariable", "Qs")]
  }
  # if status column exists: filter out NS and NA and add artificial Qs if not
    # present
  else if(!is.null(longPrior$status)) {
    #longPrior <- subset(x = longPrior, (status != "NS") & !is.na(status))
    longPrior <- longPrior[(longPrior$status != "NS") & (!is.na(longPrior$status)), ]
    if (is.null(longPrior$Qs)) {
      longPrior$Qs <- -1
    }
    longPrior <-  longPrior[, c("feature", "metaVariable", "Qs")]
  }
  # just reduce longPrios to needed columns
  else if(!is.null(longPrior$Qs)){
    longPrior <-  longPrior[, c("feature", "metaVariable", "Qs")]
  }
  #reshape data into wide format to fit minQValues data format
  widePrior <- reshape2::dcast(data = longPrior,
                               formula = feature ~ metaVariable,
                               value.var = "Qs")
  rownames(widePrior) <- widePrior$feature
  widePrior$feature <- NULL
  # only include features and metadata present in current yet-to-analyse dataset
  widePrior <- widePrior[rownames(widePrior) %in% colnames(featureMat),
                         colnames(widePrior) %in% colnames(metaMat)]
  # fill in information for all possible association of the new dataset
  minQValues <- as.data.frame(matrix(nrow = ncol(featureMat), ncol = ncol(metaMat)))
  rownames(minQValues) <- colnames(featureMat)
  colnames(minQValues) <- colnames(metaMat)
  for (i in colnames(featureMat)) {
    if (i %in% rownames(widePrior)) {
      for (j in colnames(metaMat)) {
        if (j %in% colnames(widePrior)) {
          minQValues[i, j] <- widePrior[i, j]
        }
      }
    }
  }

  return(minQValues)

}
TillBirkner/metadeconfoundR documentation built on July 1, 2024, 7:59 p.m.