R/dmp.R

# SPDX-License-Identifier: AGPL-3.0-or-later
# Copyright (C) 2021  Kevin Lu
# Parts modified from EpigenCentral

#' Find differentially methylated CpGs of interest between designated cases and
#' controls. We use minfi's dmpFinder, which performs an F-test, and then we
#' further filter for significance thresholds p < 0.05 and delta beta > 0.1.
#'
#' @param grset minfi GenomicRatioSet containing beta values
#'
#' @examples
#' \dontrun{
#' grset <- read_idat("extdata/GSE55491/samplesheet.rss-GSE55491.csv")
#' pca_plot(grset)
#' }
#' @references
#' Aryee MJ, Jaffe AE, Corrada-Bravo H, Ladd-Acosta C, Feinberg AP, Hansen KD, Irizarry RA
#' (2014). “Minfi: A flexible and comprehensive Bioconductor package for the analysis of
#' Infinium DNA Methylation microarrays.” _Bioinformatics_, *30*(10), 1363-1369. doi:
#' 10.1093/bioinformatics/btu049 (URL: https://doi.org/10.1093/bioinformatics/btu049).
#'
#' @return data frame containing differentially methylated CpGs of interest,
#' annotated with their genomic location, sorted by fdr adjusted p-value and
#' delta beta, with mean values at this position for reference
#' @export
f_test <- function (grset) {
  beta <- minfi::getBeta(grset)
  # having 0 or 1 in beta results in Inf when converting to M values and this will cause problems later on
  epsilon <- 1e-10
  beta[beta < epsilon] <- epsilon
  beta[beta > 1 - epsilon] <- 1 - epsilon
  M <- lumi::beta2m(beta)
  # Remove Sentrix_ID and Sentrix_Position from labels
  colnames(beta) <- Biobase::pData(grset)$Sample_Name
  colnames(M) <- Biobase::pData(grset)$Sample_Name

  pheno <- Biobase::pData(grset)$Sample_Group
  dmp <- minfi::dmpFinder(M, pheno = pheno, type = "categorical")

  # Filter for CpGs of significance, p < 0.05, fdr adjustment
  dmp$adj.pval <- stats::p.adjust(dmp[,"pval"], method = "fdr")
  dmp <- dmp[dmp["adj.pval"] < 0.05,][c("pval","adj.pval")]

  controls <- Biobase::pData(grset)[pheno == "control",]$Sample_Name
  cases <- Biobase::pData(grset)[pheno != "control",]$Sample_Name
  meanBetaControl <- rowMeans(beta[, controls])
  meanBetaCase <- rowMeans(beta[, cases])
  meanDeltaBeta <- abs(meanBetaCase - meanBetaControl)

  # TODO: useful for visualization on chromosomes
  chrs <- minfi::getAnnotation(grset)[,"chr"]
  genes <- minfi::getAnnotation(grset)[,"UCSC_RefGene_Name"]

  # Format result data frame
  result <- data.frame(meanBetaCase, meanBetaControl, meanDeltaBeta, chrs, genes)
  result$genes <- strsplit(as.character(result$genes), ";")
  result$genes <- as.character(lapply(result$genes, function(x) {
    paste(sort(unique(as.character(x))), collapse="; ")
  }))
  result <- merge(result, dmp, by = 0)
  names(result) <- c(
    "CpG.Site",
    "Mean.Case.Beta",
    "Mean.Control.Beta",
    "Mean.Delta.Beta",
    "Chromosome",
    "Gene",
    "P.value",
    "Adjusted.P.value")

  # Filter for CpGs of significance, delta beta > 0.1
  result <- result[result$Mean.Delta.Beta > 0.1, ]
  result <- result[order(result$Adjusted.P.value, result$Mean.Delta.Beta), ]
}

# [END]
kevinlul/EpigeneLite documentation built on Dec. 21, 2021, 6:35 a.m.