Nothing
#' Calculate the expression profile of a gene
#' @description This function calculates the expression profile of an exon in a selection
#' of BAM files. The expression profile is defined as the number of reads overlapping
#' with each position of the exon's transcript.
#' @param gene The exon for which the expression profile is calculated; this should be
#' a row from the tibble generated by \code{\link{cast_gtf_to_genes}}; for a manual input,
#' a tibble with 1 row and named columns (seqid, start, end) would be needed
#' @param bams a vector of paths to the BAM files from which the profile is extracted
#' @param unique.only whether only uniquely mapped reads should contribute to the profile;
#' default is TRUE
#' @param mapq.unique The values of the mapping quality field in the BAM file that corresponds
#' to uniquely mapped reads; by default, values of 50 and 255 are used as these correspond to
#' the most popular aligners, but an adjustment might be needed
#' @param slack slack needs to be >=readLength, adjust for efficiency; the default is 200,
#' as it is higher than most modern sequencing experiments
#' @return The function outputs a list: the first element is a matrix of expression profiles.
#' Rows correspond to positions in the exon transcript and each column corresponds to an input
#' BAM file. Each read is counted for all the positions with which it overlaps (so a read of
#' length 100 that completely overlaps with the exon would be counted for all 100 positions).
#' The second list element is a vector of raw expression of the gene in the different BAM files
#' @export
#' @examples
#' bams <- rep(system.file("extdata", "ex1.bam", package="Rsamtools", mustWork=TRUE), 2)
#' genes <- data.frame("id" = 1:2,
#' "gene_id" = c("gene1", "gene2"),
#' "seqid" = c("seq1", "seq2"),
#' "start" = 1,
#' "end" = 100)
#' profile <- calculate_expression_profile(
#' gene = genes[1,],
#' bams = bams,
#' mapq.unique = 99
#' )
#'
#' ggplot2::ggplot(tibble::tibble(y = profile$profile[,1],
#' x = seq_along(y))) +
#' ggplot2::geom_bar(ggplot2::aes(x, y), stat = "identity") +
#' ggplot2::theme_minimal()
calculate_expression_profile = function(gene,
bams,
unique.only=TRUE,
mapq.unique=c(50,255),
slack=200){
profile <- base::matrix(0,
nrow=gene$end-gene$start+1,
ncol=base::length(bams))
base::colnames(profile) <- bams
gr <- GenomicRanges::GRanges(seqnames = gene$seqid,
ranges = IRanges::IRanges(start=gene$start-slack,
end=gene$end))
params <- Rsamtools::ScanBamParam(which=gr, what=Rsamtools::scanBamWhat())
expression.vector <- base::vector(mode="double", length=base::length(bams))
for(j in base::seq_len(base::length(bams))){
bamIndex <- base::paste0(bams[j], ".bai")
bamFile <- Rsamtools::BamFile(bams[j], bamIndex)
aln <- Rsamtools::scanBam(bamFile, param = params)[[1]]
reads <- tibble::tibble(pos=aln$pos-gene$start+1,
qwidth=aln$qwidth,
mapq=aln$mapq)
if(unique.only){
mapq=NULL
reads <- dplyr::filter(reads, mapq %in% mapq.unique)
}
expression.vector[j] <- base::nrow(reads)
if(base::nrow(reads)>0){
for(r in base::seq_len(base::nrow(reads))){
add <- base::seq(from=reads$pos[r], length.out=reads$qwidth[r])
add <- add[add>0 & add<=base::nrow(profile)]
profile[add, j] <- profile[add, j] + 1
if(base::is.na(base::sum(profile[,j]))){break}
}
}
}
return(base::list("profile"=profile, "expression"=expression.vector))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.