#' Plot ASE metrics generated by Gen.input.
#'
#' This function takes the output of Gen.input and produces various plots regarding allele specific expression in the dataset.
#' @param input The output of the Gen.input function
#' @param individuals The individual(s) to restrict the plots to.
#' @param genes The gene(s) to restrict the plots to.
#' @param variants The variant(s) to restrict the plots to.
#' @export
#' @examples
#' #' #Plot the output of Gen.input stored in aseDat
#' plotASEMetrics(aseDat)
plotASEMetrics<-function(input, individuals=NULL, genes=NULL, variants=NULL)
{
thisASE<-input$ASE
if(!is.null(individuals))
{
thisASE<-input$ASE[which(input$ASE$Ind %in% individuals),]
}
if(!is.null(genes))
{
thisASE<-input$ASE[which(input$ASE$Gene %in% genes),]
}
if(!is.null(variants))
{
thisASE<-input$ASE[which(input$ASE$ID %in% variants),]
}
if(dim(input$ASE)[1] > 0)
{
numInd<-length(unique(thisASE$Ind))
numVar<-length(unique(thisASE$ID))
numGenes<-length(unique(thisASE$Gene[!is.na(thisASE$Gene)]))
numNA<-length(unique(thisASE$ID[is.na(thisASE$Gene)]))
cat("Numbers after filtering:\n")
cat("\t", numInd, "individual(s)\n")
cat("\t", numVar, "variant(s)\n")
cat("\t", numGenes, "gene(s)\n")
title<-paste("ASE metrics across ", numInd, " individuals, ", numGenes, " genes and ", numVar, " variants (", numNA, " outside known genes)", sep="")
hetCounts<-sort(table(thisASE$ID), decreasing = TRUE)
#check plotting when only one SNP
hetCounts<-as.vector(hetCounts[which(hetCounts>0)])
hetCountsRank<-cbind.data.frame(Individuals=hetCounts, Rank=1:length(hetCounts))
colnames(hetCountsRank)<-c("Individuals.Freq", "Rank")
p1<-ggplot(hetCountsRank, aes(x=Rank, y=Individuals.Freq)) +
geom_point() + scale_y_continuous(trans='log10') + annotation_logticks(scaled = TRUE,sides = "lr") +
xlab("Rank of variant") + ylab("Number of heterozygote individuals") + theme_pubr()
#rather convoluted command to get variant counts by gene
geneCounts<-table(as.character(unique(thisASE[complete.cases(thisASE[,c("ID","Gene")]),c("ID","Gene")])$Gene))
#sometimes variants left may not be in genes. If the case dont do this plot.
if(dim(geneCounts)[1] > 0)
{
geneCountsRank<-cbind.data.frame(Genes=sort(as.vector(geneCounts), decreasing = TRUE), Rank=1:length(geneCounts))
#colnames(geneCountsRank)<-c("Freq", "Rank")
p2<-ggplot(geneCountsRank, aes(x=Rank, y=Genes)) +
geom_point() + scale_y_continuous(trans='log10') + annotation_logticks(scaled = TRUE,sides = "lr") +
xlab("Rank of gene") + ylab("Number of heterozygote variants") + theme_pubr()
}
p3<-ggplot(thisASE, aes(x=propRef)) +
geom_histogram(aes(y=..density..),binwidth=.05, colour="black", fill="white") +
geom_vline(aes(xintercept=median(propRef, na.rm=T)), # Ignore NA values for median
color="red", linetype="dashed", size=1) + xlab("Proportion of reads carrying reference allele") + ylab("Density of sites") +
geom_density(alpha=.2, fill="#FF6666") + theme_pubr()
logTransBi<--log10(thisASE$binomp)
maxLogP<-max(logTransBi[is.finite(logTransBi)])
p4<-ggplot(thisASE, aes(x=logRatio, y=totalReads)) +
geom_point(alpha = 3/10,aes(colour=-log10(binomp+1e-300))) + theme_pubr() +
xlab("Log2 ratio ((Ref. reads + 1)/(Alt. reads +1))") + ylab("Total number of reads") +
scale_y_continuous(trans='log10') +
scale_colour_gradientn(name = "-log10(Binomial P value)",colors=c("cornflowerblue","orange", "red"), values=c(0,3/maxLogP,1))
if(!exists("p2"))
{
annotate_figure(ggarrange(p1,p3,p4), top=title)
} else {
annotate_figure(ggarrange(p1,p2,p3,p4), top=title)
}
} else {
stop("No data left to plot. Did you specify valid ids?")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.