#' @param filepath
#' @param colm
#' @param include.intermediate
#' @param mincov
#' @param nCytosines
#' @importFrom dplyr inner_join
#' @export
#'
# This function will merge (column 6) state calls and (column 7) rc.meth.lvl from all samples into one dataframe
# makes list of 2 dataframes
merge.cols <- function(filepath, colm, include.intermediate) {
mylist <- list()
for (l in 1:length(colm)){
extract <- lapply(filepath, function(k){
f <- fread(k, header=FALSE, skip=1, select=c(1, 2, 3, colm[l]))
if (colm[l]==6) {
if (include.intermediate==TRUE) {
f[,4] <- ifelse(f[,4] == "U", yes = 0, (ifelse(f[,4] == "I", yes = 0.5, no = 1)))
} else {
f[,4] <- ifelse(f[,4] == "U", yes = 0, no = 1)
}
}
colnames(f)[4] <- basename(k)
return(f)
})
df <- Reduce(function(x, y) {
dplyr::inner_join(x, y, by=c("V1","V2","V3"))
}, extract)
mylist[[l]] <- df
}
return(mylist)
}
#' Builds a DMR matrix for all samples
#'
#' This function generates a binary matrix, a matrix of recalibrated methylation levels and posterior probabilities for all samples.
#'
#' @param context cytosine context
#' @param samplefiles file containing full path of base level methylation calls, sample names and replicates(optional)
#' @param include.intermediate A logical specifying whether or not the intermediate component should be included in the HMM.
#' @param input.dir input directory containing all region level methylome calls
#' @param out.dir output directory
#' @importFrom data.table fread
#' @importFrom data.table fwrite
#' @importFrom data.table rbindlist
#' @export
#'
makeDMRmatrix <- function(context, samplefiles, input.dir, out.dir, include.intermediate=FALSE) {
# Read the sample file with filenames
samplelist <- fread(samplefiles, header=T)
for (j in 1:length(context)){
# list all files in the input directory
extractflist <- list.files(input.dir, pattern=paste0(context[j],".txt"), full.names=TRUE)
#extract file subsets for construction of DMRmatrix
if (length(extractflist) != 0){
mynames <- gsub(paste0("_", context[j], ".txt$"), "", basename(extractflist))
selectlist <- list()
message("\nExtracting filenames and matching them....")
for (a1 in seq_along(mynames)){
as <- samplelist[grepl(paste0("_",mynames[a1]), samplelist$file),]
if (NROW(as)==1){
as$full.path.MethReg <- grep(paste0("/", mynames[a1], "_", context[j], ".txt", sep=""), extractflist, value=TRUE)
message("\n", basename(as$full.path.MethReg)," found !")
selectlist[[a1]] <- as
} else {
message("\nMultiple files with string match ", mynames[a1]," found !")
}
}
flist <- data.table::rbindlist(selectlist)
#print(flist)
# Assign unique names for samples with or without replicate data
if (!is.null(flist$replicate)) {
message(paste0("\nRunning context ", context[j], ". Input data with replicates, creating unique sample names...\n"), sep = "")
flist$name <- paste0(flist$sample,"_", flist$replicate)
} else {
flist$name <- flist$sample
}
message(paste0("\nNow, constructing DMR matrix for ", context[j]), sep = "")
# merge samples by Chr coordinates
#(column 6) state-calls and (column 7) rc.meth.lvl
mydf <- merge.cols(filepath=flist$full.path.MethReg, include.intermediate=include.intermediate, colm=c(5, 6, 7))
# list containing state calls
status.collect <- mydf[[2]]
# renaming file names with sample names
for (a in 4:length(colnames(status.collect))) {
for (n in 1:length(flist$name)) {
if (colnames(status.collect)[a] == basename(flist$full.path.MethReg)[n]) {
colnames(status.collect)[a] = flist$name[n]
}
}
}
# list containing rcmethlvls
rc.methlevel.collect <- mydf[[3]]
# renaming file names with sample names
for (a in 4:length(colnames(rc.methlevel.collect))) {
for (n in 1:length(flist$name)) {
if (colnames(rc.methlevel.collect)[a] == basename(flist$full.path.MethReg)[n]) {
colnames(rc.methlevel.collect)[a] = flist$name[n]
}
}
}
# list containing postmax
postMax.collect <- mydf[[1]]
# renaming file names with sample names
for (a in 4:length(colnames(postMax.collect))) {
for (n in 1:length(flist$name)) {
if (colnames(postMax.collect)[a] == basename(flist$full.path.MethReg)[n]) {
colnames(postMax.collect)[a] = flist$name[n]
}
}
}
names(status.collect)[1] <- "seqnames"
names(status.collect)[2] <- "start"
names(status.collect)[3] <- "end"
names(rc.methlevel.collect)[1] <- "seqnames"
names(rc.methlevel.collect)[2] <- "start"
names(rc.methlevel.collect)[3] <- "end"
names(postMax.collect)[1] <- "seqnames"
names(postMax.collect)[2] <- "start"
names(postMax.collect)[3] <- "end"
fwrite(x=status.collect, file=paste0(out.dir,"/", context[j],"_StateCalls.txt"),
quote=FALSE, row.names=FALSE, col.names=TRUE, sep="\t")
fwrite(x=rc.methlevel.collect, file=paste0(out.dir,"/", context[j],"_rcMethlvl.txt"),
quote=FALSE, row.names=FALSE, col.names=TRUE, sep="\t")
fwrite(x=postMax.collect, file=paste0(out.dir,"/", context[j],"_postMax.txt"),
quote=FALSE, row.names=FALSE, col.names=TRUE, sep="\t")
message(paste0("\nDone! \n"), sep = "")
} else{
message(paste0("Files for context ",context[j]," do not exist\n"), sep="")
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.