#' Load several bam files
#'
#' This function allows you to load several bam files, the result is a list of data tables
#' @param organism organism of the sample group, default is "human"
#' @param blacklist_folder folder where the blacklist files are, default is 'utils'
#' @keywords import
#' @import data.table tidyverse
#' @export
#' @examples
#' load_bams(organism)
load_bams <- function(organism = "human" , blacklist_folder = "utils") {
if(organism %in% c("mouse", "mm", "mus musculus")){
genome <- "mm10"
# List of blacklisted regions (obtained from 'https://github.com/Boyle-Lab/Blacklist/tree/master/lists')
blacklist <- file.path(blacklist_folder, "mm10-blacklist.v2.bed.gz")
ensembl_species <- "mmusculus_gene_ensembl"
chromosomes = paste('chr', c(1:19, "X"), sep = "")
}else if (organism %in% c("human", "hg", "homo sapiens")) {
ensembl_species = "hsapiens_gene_ensembl"
genome <- "hg38"
# List of blacklisted regions (obtained from 'https://github.com/Boyle-Lab/Blacklist/tree/master/lists')
blacklist <<- file.path(blacklist_folder, "hg38-blacklist.bed.gz")
chromosomes = paste('chr', c(1:22, "X"), sep = "")
} else {
print("the only organisms supported are 'mouse' and 'human'")
stop()
}
###data collection based on species
sample_csv = "sample.csv"
sample <- read.csv(file.path(sample_csv))
path <- file.path("data", "input", "beds")
sample$path <- file.path(path, paste(sample$SAMPLE.NAME, "_trimmed.mapped.filtered.sorted.deduplicated.bed", sep = ""))
sampleTable = data.table(name = as.character(sample$short_name),
Batch = as.character(sample$run),
Treatment = as.character(sample$treatment),
Replicate = as.character(sample$run),
path= as.character(sample$path),
species = as.character(sample$species))
sampleTable <- sampleTable %>% filter(species == organism)
# Load the blacklist file
os <- Sys.info()['sysname'][[1]]
if (os=="Linux") {
blacklist <<- fread(cmd=paste("gunzip -c", blacklist),
select=1:3, col.names=c("Chrom", "Start", "End"))
} else if (os=="Windows") {
blacklist <- fread(cmd=paste("tar -xvzf", blacklist),
select=1:3, col.names=c("Chrom", "Start", "End"))
}
# Transform chromosome names, and coordinates to 1-based
#blacklist[, `:=`(Chrom = gsub("chr", "", Chrom), Start = Start+1, End = End+1)]
blacklist[, `:=`(Chrom = Chrom, Start = Start+1, End = End+1)]
setkeyv(blacklist, colnames(blacklist))
#loading the bed files cmd=paste("gunzip -c", x),
beds_dt <- lapply(with(sampleTable, setNames(path, name)),
function(x){
tmp = fread(file = x, showProgress=FALSE,
col.names=c("Chrom", "Start", "End", "score", "strand"),
colClasses=c("character", "numeric", "numeric", "numeric", "character"))
setkeyv(tmp, c("Chrom", "Start", "End"))
tmp = tmp[Chrom%in%chromosomes,]
# Filter out DSBs falling within blacklisted regions
tmp2 = fsetdiff(tmp[, 1:3, with=FALSE], blacklist)
tmp2[tmp, score := score]
return(tmp2)
})
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.