# SPDX-License-Identifier: AGPL-3.0-or-later
# Copyright (C) 2021 Kevin Lu
# Parts modified from EpigenCentral
#' Read Illumina 450k methylation array IDATs specified in a sample sheet
#'
#' A sample sheet is a CSV with at least the following columns:
#' Sample_Name, Sample_Group, Sentrix_ID, Sentrix_Position
#' Further columns may annotate with other metadata about the samples for
#' other types of analysis not performed by this package.
#'
#' Sample_Name should be unique. Sample_Group should be either "control" or the
#' name of a case type, which will be used to differentiate the classes when
#' finding differentially methylated CpGs.
#'
#' paste(Sample_Name, Sentrix_ID, Sentrix_Position, sep = "_") should result in
#' a prefix of the filenames of the corresponding red and green channel IDATs.
#'
#' An example for the GSE55491 dataset of Silver Russel syndrome patients is
#' in extdata/
#'
#' @param csv_sample_sheet_file Path to a CSV sample sheet
#'
#' @return normalized minfi GenomicRatioSet
#'
#' @examples
#' \dontrun{
#' grset <- read_idat("extdata/GSE55491/samplesheet.rss-GSE55491.csv")
#' }
#' @references
#' Prickett AR, Ishida M, Böhm S, Frost JM et al. Genome-wide methylation analysis
#' in Silver-Russell syndrome patients. Hum Genet 2015 Mar;134(3):317-332. PMID: 25563730
#'
#' @export
read_idat <- function (csv_sample_sheet_file) {
targets <- utils::read.csv(csv_sample_sheet_file, strip.white = TRUE)
targets$Basename <- paste(
dirname(csv_sample_sheet_file),
paste(targets$Sentrix_ID, targets$Sentrix_Position, sep = "_"),
sep = "/"
)
# TODO: use a pipe here
rgset <- minfi::read.metharray.exp(targets = targets)
grset <- minfi::ratioConvert(minfi::mapToGenome(minfi::preprocessIllumina(rgset)))
grset
}
#' Parse tab-delimited methylation matrices often found in GEO datasets.
#'
#' This is meant to parse the uncompressed Series Matrix files when IDATs
#' are not available. A CSV sample sheet of the same format required by read_idat
#' should still be provided to annotate Sample_Group for analysis, otherwise
#' you will only be able to generate PCA plots.
#'
#' @param tsv_beta_matrix_file Path to the GEO text file
#' @param csv_sample_sheet_file Path to the sample sheet for annotations. Optional
#'
#' @examples
#' \dontrun{
#' grset <- read_geo_tsv("extdata/GSE55491/GSE55491_series_matrix.txt")
#' }
#' @references
#' Prickett AR, Ishida M, Böhm S, Frost JM et al. Genome-wide methylation analysis
#' in Silver-Russell syndrome patients. Hum Genet 2015 Mar;134(3):317-332. PMID: 25563730
#'
#' @return parsed minfi GenomicRatioSet
#' @export
read_geo_tsv <- function (tsv_beta_matrix_file, csv_sample_sheet_file = NULL) {
betas <- utils::read.delim(tsv_beta_matrix_file, strip.white = TRUE, comment.char = "!")
# Transform to structure expected by minfi
methyls <- data.matrix(betas[, -1])
rownames(methyls) <- betas[[1]]
methyls <- methyls[grepl("^cg", rownames(methyls)), ]
# From https://support.bioconductor.org/p/73941/
rset <- minfi::RatioSet(Beta = methyls)
BiocGenerics::annotation(rset) <- c(array = "IlluminaHumanMethylation450k", annotation = "ilmn12.hg19")
grset <- minfi::mapToGenome(rset)
# Infer sample sheet as needed
if (is.null(csv_sample_sheet_file)) {
pheno <- data.frame(Sample_Name = colnames(grset), Sentrix_ID = 0, Sentrix_Position = 0)
} else {
pheno <- utils::read.csv(csv_sample_sheet_file, strip.white = TRUE)
}
# Annotate GenomicRatioSet with sample sheet
Biobase::pData(grset) <- methods::as(pheno, "DataFrame")
colnames(grset) <- pheno$Sample_Name
rownames(Biobase::pData(grset)) <- pheno$Sample_Name
grset
}
# [END]
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.