#' makebmDataFromData method for \code{CompressedGRangesList} objects
#'
#' @name makebmDataFromData
#' @docType methods
#' @rdname makebmDataFromData-methods
#' @aliases makebmDataFromData,CompressedGRangesList-method
#' @param data lists object
#' @param sampleNames the name of each samples
#' @importFrom methods setMethod
#' @importFrom GenomicRanges seqnames
#' @importFrom GenomicRanges start
#' @exportMethod makebmDataFromData
setMethod("makebmDataFromData", signature(data="CompressedGRangesList"),
function(data,
sampleNames=NULL){
data_list <- lapply(data, function(x){
chr <- as.character(seqnames(x))
pos <- start(x)
valueNames <- names(mcols(x))
df <- data.frame(chr=chr,pos=pos)
mcol <- as.data.frame(mcols(x))
df <- data.frame(df,mcol)
colnames(df) <- c("chr","pos",valueNames)
return(df)
})
makebmDataFromData.internal(data = data_list, sampleNames=sampleNames)
})
#' makebmDataFromData method for \code{GRanges} objects
#'
#' @name makebmDataFromData
#' @docType methods
#' @rdname makebmDataFromData-methods
#' @aliases makebmDataFromData,GRanges-method
#' @param data lists object
#' @param sampleNames the name of each samples
#' @importFrom methods setMethod
#' @importFrom GenomicRanges seqnames
#' @importFrom GenomicRanges start
#' @exportMethod makebmDataFromData
setMethod("makebmDataFromData", signature(data="GRanges"),
function(data,
sampleNames=NULL){
chr <- as.character(seqnames(data))
pos <- start(data)
valueNames <- names(mcols(data))
df <- data.frame(chr=chr,pos=pos)
mcol <- as.data.frame(mcols(data))
df <- data.frame(df,mcol)
colnames(df) <- c("chr","pos",valueNames)
data_list <- list(df)
makebmDataFromData.internal(data = data_list, sampleNames=sampleNames)
})
#' makebmDataFromData method for \code{list} objects
#'
#' @name makebmDataFromData
#' @docType methods
#' @rdname makebmDataFromData-methods
#' @aliases makebmDataFromData,list-method
#' @param data lists object
#' @param sampleNames the name of each samples
#' @details The objects in \code{data} must have specific forms. Colunms should be
#' features, which should be organized in the order of "chr", "pos", "value1",
#' "value2(optional)". chr stands for chromosome. pos stands for position on
#' chromosome, also known as coordinates. value1/2 stands for the value on each base.
#' The colnames can be any character but must be in the order. Rows stands for each
#' observation.
#' @importFrom methods setMethod
#' @exportMethod makebmDataFromData
setMethod("makebmDataFromData", signature(data="list"),
function(data,
sampleNames=NULL){
if(any(unlist(lapply(data, function(x) is.null(colnames(x)))))){
stop("pls input colnames to each objects...")
}
makebmDataFromData.internal(data = data, sampleNames=sampleNames)
})
#' makebmDataFromData method for \code{data.frame} objects
#'
#' @name makebmDataFromData
#' @docType methods
#' @rdname makebmDataFromData-methods
#' @aliases makebmDataFromData,data.frame-method
#' @param data lists object
#' @param sampleNames the name of each samples
#' @details The objects in \code{data} must have specific forms. Colunms should be
#' features, which should be organized in the order of "chr", "pos", "value1",
#' "value2(optional)". chr stands for chromosome. pos stands for position on
#' chromosome, also known as coordinates. value1/2 stands for the value on each base.
#' The colnames can be any character but must be in the order. Rows stands for each
#' observation.
#' @importFrom methods setMethod
#' @exportMethod makebmDataFromData
setMethod("makebmDataFromData", signature(data="data.frame"),
function(data,
sampleNames=NULL){
if(is.null(colnames(data))){
n0 <- ncol(data)-2
colnames(data) <- paste0("value",1:n0)
}
data_list <- list(data)
makebmDataFromData.internal(data = data_list,
sampleNames = sampleNames)
})
#' make dmData object from data
#'
#' @title makebmDataFromData.internal
#' @rdname makebmDataFromData
#' @param data lists object
#' @param sampleNames the name of each samples
#' @details This internal function was inspired by DSS::makeBSseqData.
#'
#' The objects in \code{data} must have specific forms. Colunms should be
#' features, which should be organized in the order of "chr", "pos", "value1",
#' "value2(optional)". chr stands for chromosome. pos stands for position on
#' chromosome, also known as coordinates. value1/2 stands for the value on each base.
#' The colnames can be any character but must be in the order. Rows stands for each
#' observation.
#' @return dmData object
makebmDataFromData.internal <- function(data,
sampleNames=NULL){
n0 <- length(colnames(data[[1]]))-2
## check the sampleNames
sampleNames <- .check_and_make_sampleNames(data = data, sampleNames = sampleNames)
## check variables names in data
if(!.check_variables_names(data)){
stop("pls input the same column names in each object in data...")
}
if(n0 == 1){
bmData <- make_bmData_from_value1(data = data, sampleNames = sampleNames)
}else{
bmData <- make_bmData_from_value1_and_value2(data = data, sampleNames = sampleNames)
}
return(bmData)
}
#' @importFrom dplyr arrange
make_bmData_from_value1 <- function(data, sampleNames){
n0 <- length(data)
allDat <- data.frame(data[[1]][,c(1:2)])
valueNames <- colnames(data[[1]])[3]
## merge data
for(i in 1:n0){
allDat <- data.frame(allDat, data[[i]][,3])
}
colnames(allDat) <- c("chr","pos",sampleNames)
## order the allDat
chr <- pos <- sampleNames <- NULL
allDat.ordered <- arrange(allDat,chr,pos)
value1 <- as.matrix(allDat.ordered[,sampleNames])
bmData <- bmData(chr = allDat.ordered$chr,
pos = allDat.ordered$pos,
value1 = value1,
sampleNames = sampleNames,
valueNames = valueNames)
return(bmData)
}
#' @importFrom dplyr arrange
make_bmData_from_value1_and_value2 <- function(data, sampleNames){
n0 <- length(data)
allDat_value1 <- data.frame(data[[1]][,c(1:2)])
allDat_value2 <- data.frame(data[[1]][,c(1:2)])
value1_name <- colnames(data[[1]])[3]
value2_name <- colnames(data[[1]])[4]
colnames(data[[1]])[3] <- paste0(value1_name, ".1")
colnames(data[[1]])[4] <- paste0(value2_name, ".1")
## merge data
for(i in 1:n0){
colnames(data[[i]])[3] <- paste0(value1_name, ".",i)
colnames(data[[i]])[4] <- paste0(value2_name, ".",i)
allDat_value1 <- merge(allDat_value1,data[[i]][,c(1:2,3)], all = TRUE)
allDat_value2 <- merge(allDat_value2,data[[i]][,c(1:2,4)], all = TRUE)
}
colnames(allDat_value1) <- c("chr","pos",sampleNames)
colnames(allDat_value2) <- c("chr","pos",sampleNames)
allDat_value1[is.na(allDat_value1)] <- 0
allDat_value2[is.na(allDat_value2)] <- 0
## order the allDat
chr <- pos <- NULL
allDat_value1.ordered <- arrange(allDat_value1,chr,pos)
allDat_value2.ordered <- arrange(allDat_value2,chr,pos)
value1 <- as.matrix(allDat_value1.ordered[,sampleNames])
value2 <- as.matrix(allDat_value2.ordered[,sampleNames])
bmData <- bmData(chr = allDat_value1.ordered$chr,
pos = allDat_value1.ordered$pos,
value1 = value1,
value2 = value2,
sampleNames = sampleNames,
valueNames = c(value1_name,value2_name))
return(bmData)
}
##‘ make bmData from files
#'
#' This function makes bmData object from files. Users can input
#' the name of a file or a file folder.
#'
#' @name makebmDataFromFiles
#' @param name the name of files or file folder
#' @param sampleNames the name for each file
#' @param variablesNames the names of the first two columns will be assigned c("chr","pos"),
#' the names of the following columns will be assigned by variablesNames
#' @importFrom utils file_test
#' @details bed files and txt files are supported. Bed files can
#' only contain no more than two metadata, as it stands for value1/2. Txt files
#' should organize the columns as chr, pos, value1, value2(optional).
#' @export
makebmDataFromFiles <- function(name,
sampleNames = NULL,
variablesNames = NULL){
## check the name of file
if(!is.character(name)) stop("pls input character as file name...")
if(file_test("-d", name)){
## deal with file folder
data <- makebmDataFromFiles.folder(name = name, variablesNames = variablesNames)
}else{
## deal with file
data <- makebmDataFromFiles.file(name = name, variablesNames = variablesNames)
}
result <- makebmDataFromData(data = data,
sampleNames = sampleNames)
}
#' @importFrom ChIPseeker readPeakFile
#' @importFrom GenomicRanges mcols
#' @importFrom GenomicRanges GRangesList
#' @importFrom data.table fread
#' @importFrom utils getFromNamespace
makebmDataFromFiles.folder <- function(name, variablesNames){
## check the file type
file_type <- gsub(".*\\.","",list.files(name))
file_type <- file_type[!duplicated(file_type)]
if(length(file_type) != 1){
stop("pls input files with the same type in the folder...")
}
if(is.null(variablesNames)){
cat(">> no variable name is assigned,default names will be assigned",
format(Sys.time(), "%Y-%m-%d %X"), "\n")
}
isBedFile <- getFromNamespace("isBedFile","ChIPseeker")
if(isBedFile(list.files(name))){
data <- lapply(list.files(name),function(x){
cat(">> reading",x, format(Sys.time(), "%Y-%m-%d %X"), "\n")
tmp=readPeakFile(file.path(name,x))
if(is.null(variablesNames)){
n0 <- length(names(mcols(tmp)))
variablesNames <- paste0("value",1:n0)
}
names(mcols(tmp)) <- variablesNames
return(tmp)
})
data_list <- GRangesList(data)
return(data_list)
}
data_list <- lapply(list.files(name),function(x){
cat(">> reading",x, format(Sys.time(), "%Y-%m-%d %X"), "\n")
tmp=fread(file.path(name,x))
if(is.null(variablesNames)){
n0 <- ncol(tmp)-2
variablesNames <- paste0("value",1:n0)
}
colnames(tmp)=c('chr', 'pos' ,variablesNames)
return(tmp)
})
return(data_list)
}
#' @importFrom ChIPseeker readPeakFile
#' @importFrom GenomicRanges mcols
#' @importFrom data.table fread
makebmDataFromFiles.file <- function(name, variablesNames){
if(is.null(variablesNames)){
cat(">> no variable name is assigned,default names will be assigned",
format(Sys.time(), "%Y-%m-%d %X"), "\n")
}
isBedFile <- getFromNamespace("isBedFile","ChIPseeker")
if(isBedFile(name)){
cat(">> reading",name, format(Sys.time(), "%Y-%m-%d %X"), "\n")
data=readPeakFile(name)
if(is.null(variablesNames)){
n0 <- length(names(mcols(data)))
variablesNames <- paste0("value",1:n0)
}
names(mcols(data)) <- variablesNames
return(data)
}
cat(">> reading",name, format(Sys.time(), "%Y-%m-%d %X"), "\n")
data=fread(name)
if(is.null(variablesNames)){
n0 <- ncol(data)-2
variablesNames <- paste0("value",1:n0)
}
colnames(data)=c('chr', 'pos' ,variablesNames)
return(data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.