R/plotASEMetrics.R

Defines functions plotASEMetrics

Documented in plotASEMetrics

#' Plot ASE metrics generated by Gen.input. 
#'
#' This function takes the output of Gen.input and produces various plots regarding allele specific expression in the dataset.
#' @param input The output of the Gen.input function
#' @param individuals The individual(s) to restrict the plots to.
#' @param genes The gene(s) to restrict the plots to.
#' @param variants The variant(s) to restrict the plots to.
#' @export
#' @examples
#' #' #Plot the output of Gen.input stored in aseDat
#' plotASEMetrics(aseDat)


plotASEMetrics<-function(input, individuals=NULL, genes=NULL, variants=NULL)
{
  thisASE<-input$ASE
  if(!is.null(individuals))
  {
    thisASE<-input$ASE[which(input$ASE$Ind %in% individuals),]
  }
  if(!is.null(genes))
  {
    thisASE<-input$ASE[which(input$ASE$Gene %in% genes),]
  }
  if(!is.null(variants))
  {
    thisASE<-input$ASE[which(input$ASE$ID %in% variants),]
  }
  if(dim(input$ASE)[1] > 0)
  {
    numInd<-length(unique(thisASE$Ind))
    numVar<-length(unique(thisASE$ID))
    numGenes<-length(unique(thisASE$Gene[!is.na(thisASE$Gene)]))
    numNA<-length(unique(thisASE$ID[is.na(thisASE$Gene)]))
    cat("Numbers after filtering:\n")
    cat("\t", numInd, "individual(s)\n")
    cat("\t", numVar, "variant(s)\n")
    cat("\t", numGenes, "gene(s)\n")
    
    title<-paste("ASE metrics across ", numInd, " individuals, ", numGenes, " genes and ", numVar, " variants (", numNA, " outside known genes)", sep="")
    
    hetCounts<-sort(table(thisASE$ID), decreasing = TRUE)
    #check plotting when only one SNP
    hetCounts<-as.vector(hetCounts[which(hetCounts>0)])
    hetCountsRank<-cbind.data.frame(Individuals=hetCounts, Rank=1:length(hetCounts))
    colnames(hetCountsRank)<-c("Individuals.Freq", "Rank")
    p1<-ggplot(hetCountsRank, aes(x=Rank, y=Individuals.Freq)) +
      geom_point() + scale_y_continuous(trans='log10') + annotation_logticks(scaled = TRUE,sides = "lr") + 
      xlab("Rank of variant") + ylab("Number of heterozygote individuals") + theme_pubr()
    
    #rather convoluted command to get variant counts by gene
    geneCounts<-table(as.character(unique(thisASE[complete.cases(thisASE[,c("ID","Gene")]),c("ID","Gene")])$Gene))
    #sometimes variants left may not be in genes. If the case dont do this plot.
    if(dim(geneCounts)[1] > 0)
    {
      geneCountsRank<-cbind.data.frame(Genes=sort(as.vector(geneCounts), decreasing = TRUE), Rank=1:length(geneCounts))
      #colnames(geneCountsRank)<-c("Freq", "Rank")
      p2<-ggplot(geneCountsRank, aes(x=Rank, y=Genes)) +
        geom_point() + scale_y_continuous(trans='log10') + annotation_logticks(scaled = TRUE,sides = "lr") + 
        xlab("Rank of gene") + ylab("Number of heterozygote variants") + theme_pubr()
    }
    p3<-ggplot(thisASE, aes(x=propRef)) +
      geom_histogram(aes(y=..density..),binwidth=.05, colour="black", fill="white") +
      geom_vline(aes(xintercept=median(propRef, na.rm=T)),   # Ignore NA values for median
                 color="red", linetype="dashed", size=1) + xlab("Proportion of reads carrying reference allele") + ylab("Density of sites") +
      geom_density(alpha=.2, fill="#FF6666") + theme_pubr()
    
    logTransBi<--log10(thisASE$binomp)
    maxLogP<-max(logTransBi[is.finite(logTransBi)])
    p4<-ggplot(thisASE, aes(x=logRatio, y=totalReads)) +
      geom_point(alpha = 3/10,aes(colour=-log10(binomp+1e-300))) + theme_pubr() + 
      xlab("Log2 ratio ((Ref. reads + 1)/(Alt. reads +1))") + ylab("Total number of reads") +
      scale_y_continuous(trans='log10') +
      scale_colour_gradientn(name = "-log10(Binomial P value)",colors=c("cornflowerblue","orange", "red"), values=c(0,3/maxLogP,1))
    
    if(!exists("p2"))
    {
      annotate_figure(ggarrange(p1,p3,p4), top=title)
    } else {
      annotate_figure(ggarrange(p1,p2,p3,p4), top=title)
    }
  } else {
    stop("No data left to plot. Did you specify valid ids?")
  }
  
}
AClement1990/hap-eQTL documentation built on Jan. 8, 2021, 12:41 a.m.