R/Dif_expression_helpers.R

Defines functions DTEG_add_regulation_categories na_safe DTEG_pair_results DTEG_model_results DTEG_input_validation DEG_model_simple DEG_model_results DEG_model DEG_DESeq DEG_design DEG_input_validation

Documented in DEG_model DEG_model_results DEG_model_simple

DEG_input_validation <- function(df, counts, design, target.contrast, p.value,
                                 input_check_name = "") {
  message("----------------------")
  message("Input check: ", input_check_name)
  if (!is(df, "experiment"))
    stop("df must be ORFik experiments!")
  if (!is(design, "character") | length(design) == 0) {
    stop("Design must be character of length > 0, don't use formula as input here")
  }
  if (!is(target.contrast, "character") | (length(target.contrast) != 1)){
    stop("target.contrast must be character of length 1, don't use formula as input here")
  }
  if (!(target.contrast %in% design))
    stop("target.contrast must be a column in the 'design' argument")
  if ("rep" %in% design) stop("Do not use rep in design, use batch.effect argument")
  if (length(unique(unlist(df[, target.contrast]))) == 1)
    stop("target.contrast column needs at least 2 unique values, to be able to compare anything!")
  if (nrow(df) < 4)
    stop("Experiment needs at least 4 rows, with minimum 2 per design group!")
  if (p.value > 1 | p.value <= 0)
    stop("p.value must be in interval (0,1]")
  if (!is(counts, "SummarizedExperiment")) stop("counts must be of type SummarizedExperiment")
  return(invisible(NULL))
}

DEG_design <- function(design, target.contrast, batch.effect) {
  be <- ifelse(batch.effect, "replicate + ", "")
  main.design <- as.formula(paste("~", be, paste(design, collapse = " + ")))
  message("----------------------")
  message("Full exp design: ", main.design)
  message("Target contrast: ", target.contrast)
  message("----------------------")
  return(main.design)
}

DEG_DESeq <- function(counts, main.design, message = "Creating DESeq model:") {
  message(message)
  message("----------------------")
  ddsMat <- DESeqDataSet(se = counts, design = main.design)
  ddsMat <- DESeq(ddsMat)
  message("----------------------")
  return(ddsMat)
}

#' Get DESeq2 model without running results
#'
#' This is the preparation step of DESeq2 analysis using ORFik::DEG.analysis.
#' It is exported so that you can do this step in standalone, usually you
#' want to use DEG.analysis directly.
#' @inheritParams DEG.analysis
#' @return a DESeqDataSet object with results stored as metadata columns.
#' @family DifferentialExpression
#' @export
#' @examples
#' ## Simple example (use ORFik template, then use only RNA-seq)
#' df <- ORFik.template.experiment()
#' df.rna <- df[df$libtype == "RNA",]
#' design(df.rna) # The full experimental design
#' target.contrast <- design(df.rna)[1] # Default target contrast
#' #ddsMat_rna <- DEG_model(df.rna, target.contrast)
DEG_model <- function(df,
                      target.contrast = design[1],
                      design = ORFik::design(df),
                      p.value = 0.05,
                      counts = countTable(df, "mrna", type = "summarized"),
                      batch.effect = TRUE) {
  # Input validation
  DEG_input_validation(df, counts, design, target.contrast, p.value)

  # Design
  main.design <- DEG_design(design, target.contrast, batch.effect)

  # DESeq2 model
  ddsMat_rna <- DEG_DESeq(counts, main.design)
}

#' Get DESeq2 model results from DESeqDataSet
#'
#'
#' @param ddsMat_rna a DESeqDataSet object with results stored as metadata columns.
#' @inheritParams DEG.analysis
#' @return a data.table
#' @export
#' @examples
#' ## Simple example (use ORFik template, then use only RNA-seq)
#' df <- ORFik.template.experiment()
#' df.rna <- df[df$libtype == "RNA",]
#' design(df.rna) # The full experimental design
#' target.contrast <- design(df.rna)[1] # Default target contrast
#' #ddsMat_rna <- DEG_model(df.rna, target.contrast)
#' #pairs <- combn.pairs(unlist(df[, target.contrast]))
#' #dt <- DEG_model_results(ddsMat_rna, target.contrast, pairs)
DEG_model_results <- function(ddsMat_rna, target.contrast, pairs,
                              p.value = 0.05) {
  # Do result analysis: per contrast selected
  dt.between <- data.table()
  for(i in pairs) {
    name <- paste("Comparison:", i[1], "vs", i[2])
    message(name)
    # Results
    current.contrast <- c(target.contrast, i[1], i[2])
    res_rna <- results(ddsMat_rna, contrast = current.contrast)
    suppressMessages(res_rna <- lfcShrink(ddsMat_rna, contrast = current.contrast,
                                          res = res_rna, type = "normal"))

    # The differential regulation groupings (padj is padjusted)
    expressed <- which(res_rna$padj < p.value)
    n <- rownames(res_rna)
    Regulation <- rep("No change", nrow(res_rna))
    Regulation[expressed] <- "Significant" # Old Buffering
    print(table(Regulation))

    dt.between <-
      rbindlist(list(dt.between,
                     data.table(contrast = name,
                                Regulation = Regulation,
                                id = rownames(ddsMat_rna),
                                meanCounts = res_rna$baseMean,
                                LFC = res_rna$log2FoldChange,
                                padj = res_rna$padj
                     )))
  }
  dt.between[, Regulation :=
               factor(Regulation,
                      levels = c("No change", "Significant"),
                      ordered = TRUE)]
  message("----------------------")
  return(dt.between)
}

#' Simple Fpkm ratio test DEG
#'
#' If you do not have a valid DESEQ2 experimental setup (contrast), you
#' can use this simplified test
#' @inheritParams DEG.analysis
#' @param design_ids character vector of contrast group ids, e.g.
#'  c("WT", "WT", "Mutant", "Mutant")
#' @return a data.table of fpkm ratios
#' @export
#' @examples
#' ## Simple example (use ORFik template, then use only RNA-seq)
#' df <- ORFik.template.experiment()
#' df <- df[df$libtype == "RNA",]
#' #dt <- DEG_model_simple(df)
DEG_model_simple <- function(df,
                             target.contrast = design[1],
                             design = ORFik::design(df),
                             p.value = 0.05,
                             counts = countTable(df, "mrna", type = "summarized"),
                             design_ids = as.character(df[, target.contrast]),
                             batch.effect = FALSE) {
  stopifnot(ncol(counts) == length(design_ids))
  stopifnot(length(unique(design_ids)) >= 2)
  # Design
  main.design <- DEG_design(design[1], target.contrast, batch.effect)

  ids <- rownames(counts)
  counts <- as.data.table(DESeq2::fpkm(DESeq2::DESeqDataSet(counts, ~ 1)))
  # Get LFC matrix
  counts_group <- lapply(unique(design_ids),
                         function(x) rowMeans(counts[,design_ids == x, with = FALSE]))
  setDT(counts_group)
  colnames(counts_group) <- unique(design_ids)
  dt <- data.table(LFC = log2((counts_group[,1][[1]] + 0.0001) / (counts_group[,2][[1]] + 0.0001)),
                   meanCounts = c(rowMeans(counts_group[, c(1,2), with = FALSE])))
  dt[, id := ids]
  dt[, contrast := paste(unique(design_ids)[1:2], collapse = "_vs_")]
  dt[]
  return(dt)
}

DTEG_input_validation <- function(df.rfp, df.rna, RFP_counts, RNA_counts,
                                  design, target.contrast, p.value) {
  message("----------------------")
  message("Input check:")
  if (!is(df.rfp, "experiment") | !is(df.rna, "experiment"))
    stop("df.rfp and df.rna must be ORFik experiments!")

  DEG_input_validation(df.rfp, RFP_counts, design, target.contrast, p.value,
                       "df.rfp")
  DEG_input_validation(df.rna, RNA_counts, design, target.contrast, p.value,
                       "df.rna")

  if (nrow(RFP_counts) != nrow(RNA_counts))
    stop("Count tables must have equall number of rows!")
  message("----------------------")
  return(invisible(NULL))
}

DTEG_model_results <- function(ddsMat_rna, ddsMat_ribo, ddsMat_te,
                               target.contrast, pairs, p.value = 0.05,
                               complex.categories, lfcShrinkType = "normal") {
  # Do result analysis: per contrast selected
  dt_all_pairs <- data.table()
  for(pair in pairs) {
    contrast_pair <- c(target.contrast, pair[1], pair[2])
    dt <- DTEG_pair_results(ddsMat_te, ddsMat_ribo, ddsMat_rna, contrast_pair, lfcShrinkType, p.value)
    dt <- DTEG_add_regulation_categories(dt, complex.categories)

    print(table(dt$Regulation))
    dt_all_pairs <- rbindlist(list(dt_all_pairs, dt))
  }

  attr(dt_all_pairs, "p.value") <- p.value
  return(dt_all_pairs)
}

DTEG_pair_results <- function(ddsMat_te, ddsMat_ribo, ddsMat_rna, contrast_vec,
                              lfcShrinkType, p.value, return_type = "data.table") {
  stopifnot(length(contrast_vec) == 3)
  name <- paste("Comparison:", contrast_vec[2], "vs", contrast_vec[3])
  message(name)

  res_te <- results(ddsMat_te, contrast = contrast_vec)

  res_ribo <- results(ddsMat_ribo, contrast = contrast_vec)
  suppressMessages(res_ribo <- lfcShrink(ddsMat_ribo, contrast=contrast_vec,
                                         res=res_ribo, type = lfcShrinkType))

  res_rna <- results(ddsMat_rna, contrast = contrast_vec)
  suppressMessages(res_rna <- lfcShrink(ddsMat_rna, contrast = contrast_vec,
                                        res = res_rna, type = lfcShrinkType))

  if (return_type != "data.table") {
    return(list(res_te, res_ribo, res_rna))
  }
  dt <- data.table(contrast = name,
                   Regulation = rep("No change", nrow(res_te)),
                   id = rownames(ddsMat_rna),
                   rna.lfc = res_rna$log2FoldChange,
                   rfp.lfc = res_ribo$log2FoldChange,
                   te.lfc = res_te$log2FoldChange,
                   rna.padj = res_rna$padj,
                   rfp.padj = res_ribo$padj,
                   te.padj = res_te$padj,
                   rna.meanCounts = res_rna$baseMean,
                   rfp.meanCounts = res_ribo$baseMean
  )

  dt[, rna.sign := ifelse(na_safe(rna.padj, "<=", p.value), TRUE, FALSE)]
  dt[, rfp.sign := ifelse(na_safe(rfp.padj, "<=", p.value), TRUE, FALSE)]
  dt[, te.sign := ifelse(na_safe(te.padj, "<=", p.value), TRUE, FALSE)]
  dt[, all_models_sign := FALSE]
  dt[te.sign & rfp.sign & rna.sign, all_models_sign := TRUE]
  return(dt)
}

na_safe <- function(x, op, threshold) {
  op_func <- match.fun(op)  # Convert string to function

  if (op %in% c("<", "<=")) {
    # For "<" and "<=", NA should be FALSE (assume p.adj = 1)
    !is.na(x) & op_func(x, threshold)
  } else {
    # For ">", ">=", "==", NA should be TRUE (assume non-significance)
    is.na(x) | op_func(x, threshold)
  }
}

#' Add regulation categories
#' @noRd
DTEG_add_regulation_categories <- function(dt, complex.categories) {
  # Forwarded are non significant in TE, diagonal line
  dt[!te.sign & rfp.sign & rna.sign, Regulation := "Forwarded"]
  # These two are the X and Y axis
  dt[te.sign & rfp.sign & !rna.sign, Regulation := "Translation"]
  dt[!te.sign & !rfp.sign & rna.sign, Regulation := "Expression"]
  ## These are the remaining groups

  dt[all_models_sign & na_safe(te.lfc * rna.lfc, ">", 0), Regulation := "Intensified"]
  dt[Regulation == "Intensified", Regulation := "mRNA abundance"] #  Rename
  # Also called inverse mRNA abundance
  dt[all_models_sign & na_safe(te.lfc * rna.lfc, "<", 0), Regulation := "Inverse"]
  # Stable protein output
  dt[te.sign & !rfp.sign & rna.sign, Regulation := "Buffering"]


  if (!complex.categories) {
    dt[Regulation == "Expression", Regulation := "Buffering"]
    dt[Regulation == "Inverse", Regulation := "Buffering"]
    dt[Regulation == "Forwarded", Regulation := "Buffering"]
  }
  dt[, all_models_sign := NULL]
  regulation_levels <-  c("No change", "Translation", "Buffering", "mRNA abundance",
                          if(complex.categories) c("Expression", "Forwarded","Inverse") else NULL)

  stopifnot(all(dt$Regulation %in% regulation_levels))
  dt[, Regulation :=
               factor(Regulation, levels = regulation_levels, ordered = TRUE)]
  return(dt)
}
Roleren/ORFik documentation built on April 12, 2025, 5:31 a.m.