#' Extract matrix of signal values from given bigWig files around region of interset - bed file.
#' @details This function is a wrapper around bwtool matrix command and extracts signals from bigWig files from an input BED file.
#' Once extraction is complete matrix can be summarized by mean or median signals.
#'
#' @param coldata Input coldata generated from \code{\link{read_coldata}}
#' @param bed Input bed file
#' @param genome Can be hg19 or mm10. Default NULL. If \code{bed} and \code{genome} arguments are given, \code{bed} will be used.
#' @param binSize Default 25bp
#' @param startFrom region of interest. Default from peak 'center'. Can be 'tss' or 'tes'.
#' @param up upsteram in bp. Default 2500.
#' @param down downstream in bp. Default 2500.
#' @param k cluster regions with k-means where k is the number of clusters. Defualt NULL.
#' @param op_dir Directory to store results. Defult "./"
#' @param rmAfter remove matrix files generated by bwtool once data is read into R. Default TRUE.
#' @param bw_path path to bwtool. Default looks under system path.
#' @param nthreads threads to use. Default 4.
#'
#' @import data.table
#'
#' @export
#'
extract_matrix = function(coldata = NULL, bed = NULL, genome = NULL, binSize = 25, startFrom = "center",
up = 2500, down = 2500, k = NULL,
op_dir = "./", rmAfter = TRUE, bwt_path = NULL, nthreads = 4){
check_bwtools(path = bwt_path, warn = FALSE)
if(is.null(coldata)){
stop("Missing coldata. Use read_coldata to generate one.")
}
if(!is.null(k)){
k = as.numeric(as.character(k))
if(k <2 | k >10){
stop("k should be between 2 and 10")
}
}
bigWigs = as.character(coldata$files)
if(is.null(bed)){
if(is.null(genome)){
stop("Please provide either a BED file or genome")
}else{
if(length(genome) > 1){
warning("Multiple genomes provided. Using first one..")
genome = genome[1]
}
size = check_size(size = c(up, down))
bed = make_genome_bed2(genome = genome, op_dir = op_dir, startFrom = startFrom, up = up, down = down)
}
}else{
size = check_size(size = c(up, down))
bed = make_bed(bed = bed, op_dir = op_dir)
}
if(is.null(k)){
mats = parallel::mclapply(bigWigs, function(x){
bwt_mats(bw = x, binSize = binSize, bed = bed, size = size, startFrom = startFrom, op_dir = op_dir)
}, mc.cores = nthreads)
}else{
bws = paste0(bigWigs, collapse = ",")
mats = bwt_mats_k(bw = bws, binSize = binSize, bed = bed, size = size, startFrom = startFrom, op_dir = op_dir, num_k = k)
}
mats = as.character(unlist(x = mats))
sig_list = lapply(mats, data.table::fread)
#names(sig_list) = gsub(pattern = "*\\.matrix$", replacement = "", x = basename(path = mats))
if(is.null(k)){
names(sig_list) = coldata$sample_names
}else{
names(sig_list) = "clustered_matrix"
}
if(rmAfter){
lapply(mats, function(x) system(command = paste0("rm ", x), intern = TRUE))
}
sig_list$param = c(size = size, binSize = binSize, startFrom = startFrom, k_num = k)
sig_list$cdata = coldata
system(command = paste0("rm ", bed))
sig_list
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.