R/loadData.R

Defines functions poolTwoMethylationDatasets poolMethylationDatasets joinMethylationData saveBismark readBismarkPool readBismark

Documented in poolMethylationDatasets poolTwoMethylationDatasets readBismark readBismarkPool saveBismark

#' This function takes as input a CX report file produced by Bismark 
#' and returns a \code{\link{GRanges}} object with four metadata columns 
#' The file represents the bisulfite sequencing methylation data.
#'
#' @title Read Bismark
#' @param file The filename (including path) of the methylation 
#' (CX report generated by Bismark) to be read.
#' @return the methylation data stored as a \code{\link{GRanges}} 
#' object with four metadata columns (see \code{\link{methylationDataList}}).
#' @examples
#' 
#' # load methylation data object
#' data(methylationDataList)
#' 
#' # save the one datasets into a file
#' saveBismark(methylationDataList[["WT"]], "chr3test_a_thaliana_wt.CX_report")
#' 
#' # load the data
#' methylationDataWT <- readBismark("chr3test_a_thaliana_wt.CX_report")
#' 
#' #check that the loading worked
#' all(methylationDataWT == methylationDataList[["WT"]])
#' 
#' @author Nicolae Radu Zabet and Jonathan Michael Foonlan Tsang
#' 
#' @export
readBismark <- function(file) {
  .stopIfNotAll(c(!is.null(file),
                  is.character(file), 
                  file.exists(file)),
                " file does not exist")
  
  cat("Reading file: ",file,"\n",sep="")
  cx <- scan(
    file = file,
    what = list(character(), integer(), character(), integer(), integer(), character(), character()),
    sep  = "\t",
    skip = 0);
  chrs <- unique(cx[[1]])
  chrs <- chrs[order(chrs)]
  dat <- data.frame(
    chr = factor(cx[[1]], levels = chrs), 
    pos = as.integer(cx[[2]]),
    strand = factor(cx[[3]], levels =c("+","-")),
    context = factor(cx[[6]], levels = c("CG","CHG","CHH")),
    trinucleotide_context = cx[[7]],
    readsM = as.integer(cx[[4]]),
    readsN = as.integer((cx[[4]]+cx[[5]])));
  #dat <- dat[with(dat, order(chr, pos, strand)), ];
  dat <- GRanges(seqnames = dat$chr, 
                 ranges   = IRanges(start = dat$pos, end = dat$pos), 
                 strand   = dat$strand, 
                 context = dat$context, 
                 readsM = dat$readsM, 
                 readsN = dat$readsN,
                 trinucleotide_context = dat$trinucleotide_context);
  cat("Finished reading file: ",file,"\n",sep="")
  return(dat)
}

#' This function takes as input a vector of CX report file produced by Bismark 
#' and returns a \code{\link{GRanges}} object with four metadata columns 
#' (see \code{\link{methylationDataList}}). The file represents the pooled 
#' bisulfite sequencing data.
#'
#' @title Read Bismark pool
#' @param files The filenames (including path) of the methylation 
#' (CX report generated with Bismark) to be read
#' @return the methylation data stored as a \code{\link{GRanges}} 
#' object with four metadata columns (see \code{\link{methylationDataList}}).
#' @examples
#' 
#' # load methylation data object
#' data(methylationDataList)
#' 
#' # save the two datasets
#' saveBismark(methylationDataList[["WT"]], 
#'            "chr3test_a_thaliana_wt.CX_report")
#' saveBismark(methylationDataList[["met1-3"]], 
#'            "chr3test_a_thaliana_met13.CX_report")
#' 
#' # reload the two datasets and pool them
#' filenames <- c("chr3test_a_thaliana_wt.CX_report", 
#'                "chr3test_a_thaliana_met13.CX_report")
#' methylationDataPool <- readBismarkPool(filenames)
#' 
#' @author Nicolae Radu Zabet and Jonathan Michael Foonlan Tsang
#' 
#' @export
readBismarkPool <- function(files) {
  .stopIfNotAll(c(!is.null(files),
                  length(files) > 0),
                " files is a vector containing filenames")
  
  #read the data
  data <- GRangesList()
  for(i in 1:length(files)){
    data <- c(data, GRangesList(readBismark(files[i])))
  }
  
  #pool the data
  pooledData <- poolMethylationDatasets(data)
  
  return(pooledData)
}

#' This function takes as input a \code{\link{GRanges}} object generated with 
#' \code{\link{readBismark}} and saves the output to a file using 
#' Bismark CX report format.
#'
#' @title Save Bismark
#' @param methylationData the methylation data stored as a \code{\link{GRanges}} 
#' object with four metadata columns (see \code{\link{methylationDataList}}).
#' @param filename the filename where the data will be saved.
#' @return Invisibly returns \code{NULL}
#' @examples
#' 
#' # load methylation data object
#' data(methylationDataList)
#' 
#' # save one dataset to a file
#' saveBismark(methylationDataList[["WT"]], "chr3test_a_thaliana_wt.CX_report")
#' 
#' @author Nicolae Radu Zabet
#' 
#' @export
saveBismark <- function(methylationData, filename){
  .stopIfNotAll(c(!is.null(filename),
                  is.character(filename), 
                  length(filename)==1),
                " filename should specify the filename, where the methylation data will be saved")
  
  .validateMethylationData(methylationData)
  
  
  
  chrs <- as.character(seqnames(methylationData))
  start <- start(methylationData)
  strand <- as.character(strand(methylationData))
  
  readsM <- methylationData$readsM
  readsU <- methylationData$readsN - methylationData$readsM
  
  context <- methylationData$context
  trinucleotide_context <-  methylationData$trinucleotide_context
  
  dataFile <- data.frame("chr"=chrs, "position"=start, "strand"=strand, "count methylated"=readsM, "count unmethylated"=readsU, "context"=context, "trinucleotide context"=trinucleotide_context)
  write.table(dataFile, file=filename, quote=FALSE, row.names = FALSE, col.names = FALSE, sep  = "\t")
  invisible(NULL)
}






#' This function takes as input two \code{\link{GRanges}} objects of CX reports 
#' (Bismark) for two conditions and  returns a \code{\link{GRanges}} object with 
#' five metadata columns
#'
#' @title Join methylation data
#' @param cx1 a \code{\link{GRanges}} object of CX reports (Bismark) with three 
#' metadata columns the cytosine context (CG, CHG or CHH), the number of 
#' methylated reads and the total number of reads at that position in condition 
#' 1
#' @param cx2 a \code{\link{GRanges}} object of CX reports (Bismark) with three 
#' metadata columns the cytosine context (CG, CHG or CHH), the number of 
#' methylated reads and the total number of reads at that position in condition 
#' 2
#' @return a the methylation data stored as a \code{\link{GRanges}} 
#' object with six metadata columns (see \code{\link{methylationDataList}}).
.joinMethylationData <- function(cx1, cx2){
  
  overlaps <- findOverlaps(cx1, cx2)
  indexes <- which(!duplicated(queryHits(overlaps)))
  methylData <- GRanges(seqnames = seqnames(cx1[queryHits(overlaps)[indexes]]), 
                        ranges   = ranges(cx1[queryHits(overlaps)[indexes]]), 
                        strand   = strand(cx1[queryHits(overlaps)[indexes]]), 
                        context  = cx1$context[queryHits(overlaps)[indexes]],
                        trinucleotide_context = cx1$trinucleotide_context[queryHits(overlaps)[indexes]], 
                        readsM1  = cx1$readsM[queryHits(overlaps)[indexes]], 
                        readsN1  = cx1$readsN[queryHits(overlaps)[indexes]],                                   
                        readsM2  = cx2$readsM[subjectHits(overlaps)[indexes]], 
                        readsN2  = cx2$readsN[subjectHits(overlaps)[indexes]])
  return(methylData)
}



#' This function pools together multiple methylation datasets. 
#'
#' @title Pool methylation data
#' @param methylationDataList a \code{\link{GRangesList}} object where each 
#' element of the list is a \code{\link{GRanges}} object with the methylation 
#' data in the corresponding condition (see \code{\link{methylationDataList}}).
#' @return the methylation data stored as a \code{\link{GRanges}} 
#' object with four metadata columns (see \code{\link{methylationDataList}}).
#' 
#' @examples
#' # load methylation data object
#' data(methylationDataList)
#' 
#' # pools the two datasets together
#' pooledMethylationData <- poolMethylationDatasets(methylationDataList)
#' 
#' @author Nicolae Radu Zabet
#' 
#' @export
poolMethylationDatasets <- function(methylationDataList){
  .validateMethylationDataList(methylationDataList)
  
  pooledMethylationData <- methylationDataList[[1]]
  if(length(methylationDataList) > 1){
    for(i in 2:length(methylationDataList)){
      cat("joining two datasets ...\n")
      buffer <- .joinMethylationData(pooledMethylationData, methylationDataList[[i]])
      
      pooledMethylationData <- GRanges(seqnames = seqnames(buffer), 
                                       ranges   = ranges(buffer), 
                                       strand   = strand(buffer), 
                                       context  = buffer$context,
                                       readsM  = (buffer$readsM1 + buffer$readsM2), 
                                       readsN  = (buffer$readsN1 + buffer$readsN2),                                       
                                       trinucleotide_context = buffer$trinucleotide_context)                                   
      cat("the sum was performed ...\n")
      
      
    }
  }
  
  return(pooledMethylationData)      
}



#' This function pools together two methylation datasets. 
#'
#' @title Pool two methylation datasets
#' @param methylationData1 a \code{\link{GRanges}} object with the methylation 
#' data (see \code{\link{methylationDataList}}).
#' @param methylationData2 a \code{\link{GRanges}} object with the methylation 
#' data (see \code{\link{methylationDataList}}).
#' @return the methylation data stored as a \code{\link{GRanges}} 
#' object with four metadata columns (see \code{\link{methylationDataList}}).
#' 
#' @examples
#' # load methylation data object
#' data(methylationDataList)
#' 
#' # save the two datasets together
#' pooledMethylationData <- poolTwoMethylationDatasets(methylationDataList[[1]], 
#'                          methylationDataList[[2]])
#' 
#' @author Nicolae Radu Zabet
#' 
#' @export
poolTwoMethylationDatasets <- function(methylationData1, methylationData2){
  .validateMethylationData(methylationData1)
  .validateMethylationData(methylationData2)
  
  
  cat("joining two datasets ...\n")
  buffer <- .joinMethylationData(methylationData1, methylationData2)
  
  pooledMethylationData <- GRanges(seqnames = seqnames(buffer), 
                                   ranges   = ranges(buffer), 
                                   strand   = strand(buffer), 
                                   context  = buffer$context,
                                   readsM  = (buffer$readsM1 + buffer$readsM2), 
                                   readsN  = (buffer$readsN1 + buffer$readsN2),                                       
                                   trinucleotide_context = buffer$trinucleotide_context)      
  
  cat("the sum was performed ...\n")
  
  
  
  return(pooledMethylationData)      
}

Try the DMRcaller package in your browser

Any scripts or data that you put into this service are public.

DMRcaller documentation built on Nov. 17, 2017, 9:37 a.m.