R/mapex.R

Defines functions run_mapex

Documented in run_mapex

#' Run the mapex algorithm
#'
#'
#' @param bam_file Alignment file for the tumor sample to filter in bam format
#' @param bam_idx Index for the alignment in bam_file
#' @param variant_file variant call file in maflite, vcf, call_stats, or snp (varscan) format
#' @param variant_file_type type of variant call file either maf (default maflite format), vcf (version >= 4.0), call_stats (MuTect1 format), or snp (Varscan2 format)
#' @param blast_out_file file name for returned blast hits (default=NULL, do not output scored reads)
#' @param blastn_path Path to a local installation of BLASTN
#' @param blast_db_path Path to a BLAST database (typically human/mouse reference or human reference)
#' @param blast_threads Number to threads to run blast with (default=1)
#' @param min_mapq Minimum read mapping quality score (default=1)
#' @return a data frame with four columns; \code{chrom} the chromosome the variant is on,\code{loc} the position of the variant
#'   \code{variant_score} the variant score between 0 and 1, and \code{reason} the variant classification which takes values
#'   \code{mouse}, \code{off_target}, or \code{on_target}.
#'
#' @examples
#' \dontrun{
#' ## file paths and variables
#' bam <- "/path/to/tumor.bam"
#' bamidx <- "/path/to/tumor.bai"
#' variants <- "/path/to/variants.vcf"
#' blastout <- "/path/to/blastoutput.txt" # Read level blast results, not required
#' blastpath <- "/path/to/blastn" # if blast is in the users path, just "blastn" here
#' blastdb <- "/path/to/combined_db"
#' threads <- 1 # number of threads consumed by blastn
#' mapq <- 1 # this is the default for minimum mapq score

#' results <- run_mapex(bam_file=bam,
#'                      bam_idx=bamidx,
#'                      variant_file=variants,
#'                      variant_file_type='vcf',
#'                      blast_out_file=blastout,
#'                      blastn_path=blastpath,
#'                      blast_threads=threads,
#'                      min_mapq=mapq)
#'}
#' @export
run_mapex <- function(bam_file,bam_idx,variant_file,variant_file_type='maf',blast_out_file=NULL,blastn_path,blast_db_path,blast_threads=1,min_mapq=1){
  # load the variants and get the locations of called variants
  variant_locs <- .load_variants(variant_file,variant_file_type)
  # extract variant reads and run BLASTN
  blast_out <- .run_blast(variant_locs,blast_out_file,bam_file,bam_idx,blastn_path,blast_db_path,blast_threads,min_mapq)
  # score reads
  scored_reads <- .score_reads(blast_out)
  if (!is.null(blast_out_file)){
    readr::write_tsv(scored_reads,blast_out_file)
  }
  # score variants
  scored_variants <- .score_variants(scored_reads)
  scored_variants
}
bmannakee/mapexr documentation built on May 5, 2019, 12:27 p.m.