#' @importFrom dplyr is.tbl
#' @importFrom stringr str_c
#'
spectorFile <- function(f_bam, id_bam = NULL, s_prep = NULL, out_F = NULL,
save_out, chr_cores, regions = "giab",
region_size = NULL, f_bed = NULL,
header = FALSE, smr = "rms", genome_v = "hg19",
region_o = 0) {
message(paste("Running on file:", f_bam, "\n=>\n"))
#
# Load region data frame from bed file or filter giab regions
# --------------------------------------------------------------------------
if(!is.tbl(f_bed)) {
region_df <- getRegions(regions = regions, f_bed = f_bed,
region_size = region_size, header = header,
genome = genome_v, reg_overlap = region_o)
} else {
region_df <- f_bed
}
# extract bam file stats using rsamtools
# --------------------------------------------------------------------------
bam_stats <- nReadsBam(f_bam)
n_read <- sum(bam_stats$n_read / 10^9, na.rm = TRUE)
# ==========================================================================
# COMPARE CHROMOSOME LISTS BETWEEN BAM AND BED FILE
# ==========================================================================
region_df <- chrIntersect(region_df, bam_stats$chrom)
if (nrow(region_df) > 0) {
message(c("Running spector for:\n",
paste0("Chromosome: ", unique(region_df$chrom), "\n")))
} else {
stop("Issues matching chr in '*.bed' and '*.bam' or no overlap",
call. = FALSE)
}
message(str_c(format(nrow(region_df), big.mark = ","),
" regions identified."))
srm_df <- spectorLas(region_df = region_df, f_bam = f_bam,
chr_cores = chr_cores, n_bam = n_read, met = smr)
if (is.null(id_bam)) {
id_bam <- gsub(".bam", "", x = basename(f_bam))
}
srm_df$id_bam <- id_bam
srm_df$prep <- s_prep
if (is.null(out_F) & save_out) {
warning("Output folder not specified, output will not be saved",
call. = FALSE, domain = 'spector_run')
} else if (save_out | !is.null(out_F)) {
out_path <- paste(out_F, id_bam, "_out.csv", sep = "")
write.csv(srm_df, file = out_path, row.names = FALSE)
message(paste("Completed, output written to:", out_path))
}
return(srm_df)
}
#' @importFrom parallel mclapply
#' @importFrom dplyr bind_rows
#' @importFrom stringr str_c
#'
spectorList <- function(fs_bam, id_v, s_v, out_F, file_cores = 1,
save_out, chr_cores, regions = "giab",
region_size = NULL, f_bed = NULL,
bed_header = FALSE, smr = "rms", genome_v = "hg19",
region_o = 0) {
# function to be run inside of mclappy
f_idx <- seq_along(fs_bam)
srm_df <-
mclapply(X = f_idx, function(idx) {
message(str_c("Running file: ", fs_bam[idx]))
spectorFile(f_bam = fs_bam[idx], id_bam = id_v[idx],
s_prep = s_v[idx], out_F = NULL, save_out = FALSE,
chr_cores = chr_cores, regions = regions,
region_size = region_size, f_bed = f_bed,
header = bed_header, smr = smr, region_o = region_o)},
mc.cores = file_cores) %>%
bind_rows()
return(srm_df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.