R/plots.R

Defines functions demuxBoxplot.basecallQC passFilterTilePlot.list passFilterTilePlot.basecallQC passFilterBoxplot.list passFilterBoxplot.basecallQC passFilterBar.list passFilterBar.basecallQC

Documented in demuxBoxplot.basecallQC passFilterBar.basecallQC passFilterBoxplot.basecallQC passFilterTilePlot.basecallQC

#' Barplot of Illumina basecalling statistics for reads passing filter.
#'
#' Produces a barplot of Illumina basecalling statistics for reads passing filter.
#' 
#' @usage
#' \S4method{passFilterBar}{baseCallQC}(object,groupBy,metricToPlot)
#'
#' @docType methods
#' @name passFilterBar
#' @rdname passFilterBar
#' @aliases passFilterBar passFilterBar,baseCallQC-method
#' 
#' @author Thomas Carroll and Marian Dore
#' @param object A  basecallQC object or list from call to baseCallMetrics()
#' @param groupBy Character vector of how data is grouped for plotting. Should be either "Project","Sample","Lane","Tile","ReadNumber".
#' @param metricToPlot Character vector defining which metric will be displayed in plot. Should be either "Yield","Yield30","QualityScoreSum" or "ClusterCount".
#' @return A ggplot2 object.
#' @import stringr XML RColorBrewer methods raster BiocStyle
#' @examples
#' fileLocations <- system.file("extdata",package="basecallQC")
#' runXML <- dir(fileLocations,pattern="runParameters.xml",full.names=TRUE)
#' config <- dir(fileLocations,pattern="config.ini",full.names=TRUE)
#' sampleSheet <- dir(fileLocations,pattern="*\\.csv",full.names=TRUE)
#' outDir <- file.path(fileLocations,"Runs/161105_D00467_0205_AC9L0AANXX/C9L0AANXX/")
#' bcl2fastqparams <- BCL2FastQparams(runXML,config,runDir=getwd(),outDir,verbose=FALSE)
#' bclQC <- basecallQC(bcl2fastqparams,RunMetaData=NULL,sampleSheet)
#' plot <- passFilterBar(bclQC)

#' @export
passFilterBar.basecallQC <- function(object,groupBy=c("Lane"),metricToPlot="Yield"){
  groupByS <- unique(c(groupBy,"Filter"))
  groupByG <- unique(c(groupBy))
  toPlot <- object@baseCallMetrics$convStatsProcessed %>%
    group_by_(.dots=as.list(groupByS)) %>%
    filter(Sample != "all") %>%
    summarise_(.dots = setNames(list(interp( ~sum(as.numeric(var)),
                                             var=as.name(metricToPlot))
    )
    ,metricToPlot)) %>%
    spread_("Filter",metricToPlot) %>%
    mutate(Ff=Raw-Pf) %>%
    dplyr::select(-Raw) %>%
    tbl_df %>%
    gather_(key_col="PassFilter",value_col=as.name(metricToPlot),c("Ff","Pf"))
  p <- ggplot(data=toPlot,aes_string(x=groupByG,y=metricToPlot,fill="PassFilter"))+geom_bar(stat = "identity")+ coord_flip()
  return(p)
}

setGeneric("passFilterBar", function(object="basecallQC",groupBy="character",metricToPlot="character") standardGeneric("passFilterBar"))

#' @rdname passFilterBar
#' @export
setMethod("passFilterBar", signature(object="basecallQC"), passFilterBar.basecallQC)


passFilterBar.list <- function(object,groupBy=c("Lane"),metricToPlot="Yield"){
  groupByS <- unique(c(groupBy,"Filter"))
  groupByG <- unique(c(groupBy))
  toPlot <- object$convStatsProcessed %>%
    group_by_(.dots=as.list(groupByS)) %>%
    filter(Sample != "all") %>%
    summarise_(.dots = setNames(list(interp( ~sum(as.numeric(var)),
                                             var=as.name(metricToPlot))
    )
    ,metricToPlot)) %>%
    spread_("Filter",metricToPlot) %>%
    mutate(Ff=Raw-Pf) %>%
    dplyr::select(-Raw) %>%
    tbl_df %>%
    gather_(key_col="PassFilter",value_col=as.name(metricToPlot),c("Ff","Pf"))
  p <- ggplot(data=toPlot,aes_string(x=groupByG,y=metricToPlot,fill="PassFilter"))+geom_bar(stat = "identity")+ coord_flip()
  return(p)
}

#' @rdname passFilterBar
#' @export
setMethod("passFilterBar", signature(object="list"),passFilterBar.list)


#' Boxplot of Illumina basecalling statistics for reads passing filter.
#'
#' Produces a boxplot of basecalling statistics for reads passing filter.
#'
#' @usage
#' \S4method{passFilterBoxplot}{baseCallQC}(object,groupBy,metricToPlot)
#' @docType methods
#' @name passFilterBoxplot
#' @rdname passFilterBoxplot
#' @aliases passFilterBoxplot passFilterBoxplot,baseCallQC-method
#' 
#' @author Thomas Carroll and Marian Dore
#' @param object A  basecallQC object or list from call to baseCallMetrics()
#' @param groupBy Character vector of how data is grouped for plotting. Should be either "Project","Sample","Lane","Tile","ReadNumber".
#' @param metricToPlot Character vector defining which metric will be displayed in plot. Should be either "Yield","Yield30","QualityScoreSum" or "ClusterCount".
#' @return  A ggplot2 object.
#' @import stringr XML RColorBrewer methods raster BiocStyle
#' @examples
#' fileLocations <- system.file("extdata",package="basecallQC")
#' runXML <- dir(fileLocations,pattern="runParameters.xml",full.names=TRUE)
#' config <- dir(fileLocations,pattern="config.ini",full.names=TRUE)
#' sampleSheet <- dir(fileLocations,pattern="*\\.csv",full.names=TRUE)
#' outDir <- file.path(fileLocations,"Runs/161105_D00467_0205_AC9L0AANXX/C9L0AANXX/")
#' bcl2fastqparams <- BCL2FastQparams(runXML,config,runDir=getwd(),outDir,verbose=FALSE)
#' bclQC <- basecallQC(bcl2fastqparams,RunMetaData=NULL,sampleSheet)
#' plot <- passFilterBoxplot(bclQC,groupBy = "Sample")
#' @export
passFilterBoxplot.basecallQC <- function(object,groupBy=c("Lane"),metricToPlot="Yield"){
  groupByS <- unique(c("Lane","Sample","Tile","Filter"))
  groupByG <- unique(c(groupBy))
  toPlot <- object@baseCallMetrics$convStatsProcessed %>%
    group_by_(.dots=as.list(groupByS)) %>%
    filter(Sample != "all") %>%
    summarise_(.dots = setNames(list(interp( ~sum(as.numeric(var)),
                                             var=as.name(metricToPlot))
    )
    ,metricToPlot)) %>%
    spread_("Filter",metricToPlot) %>%
    mutate(Ff=Raw-Pf) %>%
    dplyr::select(-Raw) %>%
    tbl_df %>%
    gather_(key_col="PassFilter",value_col=as.name(metricToPlot),c("Ff","Pf"))
  p <- ggplot(data=toPlot,aes_string(x=groupByG,y="Yield",fill="PassFilter"))+geom_violin(scale="width")+ coord_flip()+facet_grid(PassFilter~.,scales = "free")+geom_jitter()
  return(p)
}

setGeneric("passFilterBoxplot", function(object="basecallQC",groupBy="character",metricToPlot="character") standardGeneric("passFilterBoxplot"))

#' @rdname passFilterBoxplot
#' @export
setMethod("passFilterBoxplot", signature(object="basecallQC"), passFilterBoxplot.basecallQC)

passFilterBoxplot.list <- function(object,groupBy=c("Lane"),metricToPlot="Yield"){
  groupByS <- unique(c("Lane","Sample","Tile","Filter"))
  groupByG <- unique(c(groupBy))
  toPlot <- object$convStatsProcessed %>%
    group_by_(.dots=as.list(groupByS)) %>%
    filter(Sample != "all") %>%
    summarise_(.dots = setNames(list(interp( ~sum(as.numeric(var)),
                                             var=as.name(metricToPlot))
    )
    ,metricToPlot)) %>%
    spread_("Filter",metricToPlot) %>%
    mutate(Ff=Raw-Pf) %>%
    dplyr::select(-Raw) %>%
    tbl_df %>%
    gather_(key_col="PassFilter",value_col=as.name(metricToPlot),c("Ff","Pf"))
  p <- ggplot(data=toPlot,aes_string(x=groupByG,y="Yield",fill="PassFilter"))+geom_violin(scale="width")+ coord_flip()+facet_grid(PassFilter~.,scales = "free")+geom_jitter()
  return(p)
}

#' @rdname passFilterBoxplot
#' @export
setMethod("passFilterBoxplot", signature(object="list"),passFilterBoxplot.list)


#' Tile plot of Illumina basecalling statistics for reads passing filter.
#'
#' Produces a plot of metric per Tile for basecalling statistics of reads passing/failing filter.
#'
#' @usage
#' \S4method{passFilterTilePlot}{baseCallQC}(object,metricToPlot)
#' @docType methods
#' @name passFilterTilePlot
#' @rdname passFilterTilePlot
#' @aliases passFilterTilePlot passFilterTilePlot,baseCallQC-method
#' @author Thomas Carroll and Marian Dore
#' @param object  A  basecallQC object or list from call to baseCallMetrics()
#' @param metricToPlot Character vector defining which metric will be displayed in plot. Should be either "Yield","Yield30","QualityScoreSum" or "ClusterCount".
#' @return A ggplot2 object.
#' @import stringr XML RColorBrewer methods raster BiocStyle
#' @examples
#' fileLocations <- system.file("extdata",package="basecallQC")
#' runXML <- dir(fileLocations,pattern="runParameters.xml",full.names=TRUE)
#' config <- dir(fileLocations,pattern="config.ini",full.names=TRUE)
#' sampleSheet <- dir(fileLocations,pattern="*\\.csv",full.names=TRUE)
#' outDir <- file.path(fileLocations,"Runs/161105_D00467_0205_AC9L0AANXX/C9L0AANXX/")
#' bcl2fastqparams <- BCL2FastQparams(runXML,config,runDir=getwd(),outDir,verbose=FALSE)
#' bclQC <- basecallQC(bcl2fastqparams,RunMetaData=NULL,sampleSheet)
#' plot <- passFilterTilePlot(bclQC,metricToPlot="Yield")
#' @export
passFilterTilePlot.basecallQC <- function(object,metricToPlot="Yield"){
  groupByS <- unique(c("Lane","Sample","Tile","Filter"))
  #groupByG <- unique(c(groupBy))
  toPlot <- object@baseCallMetrics$convStatsProcessed %>%
    group_by_(.dots=as.list(groupByS)) %>%
    filter(Sample != "all") %>%
    summarise_(.dots = setNames(list(interp( ~sum(as.numeric(var)),
                                             var=as.name(metricToPlot))
    )
    ,metricToPlot)) %>%
    spread_("Filter",metricToPlot) %>%
    mutate(Ff=Raw-Pf) %>%
    dplyr::select(-Raw) %>%
    tbl_df %>%
    gather_(key_col="PassFilter",value_col=as.name(metricToPlot),c("Ff","Pf")) %>%
    mutate(Surface=str_sub(Tile,1,1),Box=str_sub(Tile,2,2),Pos=str_sub(Tile,3))
    pPf <- filter(toPlot,PassFilter=="Pf") %>%
    ggplot(aes(x=Box,y=Pos))+geom_tile(aes_string(fill=metricToPlot))+facet_grid(~Lane)+scale_fill_gradient2(low = "white", high = "darkblue")+theme_bw()
    pFf <- filter(toPlot,PassFilter=="Ff") %>%
    ggplot(aes(x=Box,y=Pos))+geom_tile(aes_string(fill=metricToPlot))+facet_grid(~Lane)+scale_fill_gradient2(low = "white", high = "darkblue")+theme_bw()
  return(list(PassFilter=pPf,FailFilter=pFf))
}

setGeneric("passFilterTilePlot", function(object="basecallQC",metricToPlot="character") standardGeneric("passFilterTilePlot"))

#' @rdname passFilterTilePlot
#' @export
setMethod("passFilterTilePlot", signature(object="basecallQC"), passFilterTilePlot.basecallQC)

passFilterTilePlot.list <- function(object,metricToPlot="Yield"){
  groupByS <- unique(c("Lane","Sample","Tile","Filter"))
  #groupByG <- unique(c(groupBy))
  toPlot <- object$convStatsProcessed %>%
    group_by_(.dots=as.list(groupByS)) %>%
    filter(Sample != "all") %>%
    summarise_(.dots = setNames(list(interp( ~sum(as.numeric(var)),
                                             var=as.name(metricToPlot))
    )
    ,metricToPlot)) %>%
    spread_("Filter",metricToPlot) %>%
    mutate(Ff=Raw-Pf) %>%
    dplyr::select(-Raw) %>%
    tbl_df %>%
    gather_(key_col="PassFilter",value_col=as.name(metricToPlot),c("Ff","Pf")) %>%
    mutate(Surface=str_sub(Tile,1,1),Box=str_sub(Tile,2,2),Pos=str_sub(Tile,3))
  pPf <- filter(toPlot,PassFilter=="Pf") %>%
    ggplot(aes(x=Box,y=Pos))+geom_tile(aes_string(fill=metricToPlot))+facet_grid(~Lane)+scale_fill_gradient2(low = "white", high = "darkblue")+theme_bw()
  pFf <- filter(toPlot,PassFilter=="Ff") %>%
    ggplot(aes(x=Box,y=Pos))+geom_tile(aes_string(fill=metricToPlot))+facet_grid(~Lane)+scale_fill_gradient2(low = "white", high = "darkblue")+theme_bw()
  return(list(PassFilter=pPf,FailFilter=pFf))
  
}

#' @rdname passFilterTilePlot
#' @export
setMethod("passFilterTilePlot", signature(object="list"),passFilterTilePlot.list)


#' Boxplot of Illumina demultiplexing statistics.
#'
#' Produces a boxplot of demultiplexing statistics of reads with perfect/mismatched barcode.
#'
#' @usage
#' \S4method{demuxBoxplot}{baseCallQC}(object,groupBy)
#' @docType methods
#' @name demuxBoxplot
#' @rdname demuxBoxplot
#' @aliases demuxBoxplot demuxBoxplot,baseCallQC-method
#' @author Thomas Carroll and Marian Dore
#' @param object A  basecallQC object or list from call to demultiplexMetrics()
#' @param groupBy Character vector of how data is grouped for plotting. Should be either "Project","Sample" or "Lane".
#' @return A ggplot2 object.
#' @import stringr XML RColorBrewer methods raster BiocStyle
#' @examples
#' fileLocations <- system.file("extdata",package="basecallQC")
#' runXML <- dir(fileLocations,pattern="runParameters.xml",full.names=TRUE)
#' config <- dir(fileLocations,pattern="config.ini",full.names=TRUE)
#' sampleSheet <- dir(fileLocations,pattern="*\\.csv",full.names=TRUE)
#' outDir <- file.path(fileLocations,"Runs/161105_D00467_0205_AC9L0AANXX/C9L0AANXX/")
#' bcl2fastqparams <- BCL2FastQparams(runXML,config,runDir=getwd(),outDir,verbose=FALSE)
#' bclQC <- basecallQC(bcl2fastqparams,RunMetaData=NULL,sampleSheet)
#' plot <- demuxBoxplot(bclQC)
#' @export
demuxBoxplot.basecallQC <- function(object,groupBy=c("Lane")){
  metricToPlot <- "Count"
  groupByS <- unique(c("Lane","Sample","Project","Barcode","BarcodeStat"))
  groupByG <- unique(c(groupBy))

  toPlot <- object@demultiplexMetrics$demuxStatsProcessed %>%
    group_by_(.dots=as.list(groupByS)) %>%
    filter(Sample != "all") %>%
    summarise_(.dots = setNames(list(interp( ~sum(as.numeric(var)),
                                             var=as.name(metricToPlot))
    )
    ,metricToPlot)) %>%
    spread_("BarcodeStat",metricToPlot) %>%
    mutate(mismatchedBarcodeCount=BarcodeCount-PerfectBarcodeCount) %>%
    dplyr::select(-BarcodeCount) %>%
    tbl_df %>%
    gather_(key_col="BarcodeCount",value_col=as.name(metricToPlot),c("mismatchedBarcodeCount","PerfectBarcodeCount"))
    p <- ggplot(data=toPlot,aes_string(x=groupByG,y=metricToPlot,fill="BarcodeCount"))+geom_violin(scale = "width")+ coord_flip()+facet_grid(BarcodeCount~.)
  return(p)
}

setGeneric("demuxBoxplot", function(object="basecallQC",groupBy="character") standardGeneric("demuxBoxplot"))

#' @rdname demuxBoxplot
#' @export
setMethod("demuxBoxplot", signature(object="basecallQC"), demuxBoxplot.basecallQC)

demuxBoxplot.list <- function(object,groupBy=c("Lane")){
  metricToPlot <- "Count"
  groupByS <- unique(c("Lane","Sample","Project","Barcode","BarcodeStat"))
  groupByG <- unique(c(groupBy))
  
  toPlot <- object$demuxStatsProcessed %>%
    group_by_(.dots=as.list(groupByS)) %>%
    filter(Sample != "all") %>%
    summarise_(.dots = setNames(list(interp( ~sum(as.numeric(var)),
                                             var=as.name(metricToPlot))
    )
    ,metricToPlot)) %>%
    spread_("BarcodeStat",metricToPlot) %>%
    mutate(mismatchedBarcodeCount=BarcodeCount-PerfectBarcodeCount) %>%
    dplyr::select(-BarcodeCount) %>%
    tbl_df %>%
    gather_(key_col="BarcodeCount",value_col=as.name(metricToPlot),c("mismatchedBarcodeCount","PerfectBarcodeCount"))
  p <- ggplot(data=toPlot,aes_string(x=groupByG,y=metricToPlot,fill="BarcodeCount"))+geom_violin(scale = "width")+ coord_flip()+facet_grid(BarcodeCount~.)
  return(p)
}

#' @rdname demuxBoxplot
#' @export
setMethod("demuxBoxplot", signature(object="list"), demuxBoxplot.list)



#' Barplot of Illumina demultiplexing statistics.
#'
#' Produces a barplot of demultiplexing statistics of reads with perfect/mismatched barcode.
#'
#' @usage
#' \S4method{demuxBarplot}{baseCallQC}(object,groupBy)
#' @docType methods
#' @name demuxBarplot
#' @rdname demuxBarplot
#' @aliases demuxBarplot demuxBarplot,baseCallQC-method
#' @author Thomas Carroll and Marian Dore
#' @param object A  basecallQC object or list from call to demultiplexMetrics()
#' @param groupBy Character vector of how data is grouped for plotting. Should be either "Project","Sample" or "Lane".
#' @return A ggplot2 object.
#' @import stringr XML RColorBrewer methods raster BiocStyle
#' @examples
#' fileLocations <- system.file("extdata",package="basecallQC")
#' runXML <- dir(fileLocations,pattern="runParameters.xml",full.names=TRUE)
#' config <- dir(fileLocations,pattern="config.ini",full.names=TRUE)
#' sampleSheet <- dir(fileLocations,pattern="*\\.csv",full.names=TRUE)
#' outDir <- file.path(fileLocations,"Runs/161105_D00467_0205_AC9L0AANXX/C9L0AANXX/")
#' bcl2fastqparams <- BCL2FastQparams(runXML,config,runDir=getwd(),outDir,verbose=FALSE)
#' bclQC <- basecallQC(bcl2fastqparams,RunMetaData=NULL,sampleSheet)
#' plot <- demuxBarplot(bclQC)
#' @export
demuxBarplot.basecallQC <- function(object,groupBy=c("Lane")){
  metricToPlot <- "Count"
  groupByS <- unique(c("Lane","Sample","Project","Barcode","BarcodeStat"))
  groupByG <- unique(c(groupBy))

  toPlot <- object@demultiplexMetrics$demuxStatsProcessed %>%
    group_by_(.dots=as.list(groupByS)) %>%
    filter(Sample != "all") %>%
    summarise_(.dots = setNames(list(interp( ~sum(as.numeric(var)),
                                             var=as.name(metricToPlot))
    )
    ,metricToPlot)) %>%
    spread_("BarcodeStat",metricToPlot) %>%
    mutate(mismatchedBarcodeCount=BarcodeCount-PerfectBarcodeCount) %>%
    dplyr::select(-BarcodeCount) %>%
    tbl_df %>%
    gather_(key_col="BarcodeCount",value_col=as.name(metricToPlot),c("mismatchedBarcodeCount","PerfectBarcodeCount"))
  p <- ggplot(data=toPlot,aes_string(x=groupByG,y=metricToPlot,fill=groupByG))+geom_bar(stat="identity")+ coord_flip()+facet_grid(BarcodeCount~.)
  return(p)
}

setGeneric("demuxBarplot", function(object="basecallQC",groupBy="character") standardGeneric("demuxBarplot"))

#' @rdname demuxBarplot
#' @export
setMethod("demuxBarplot", signature(object="basecallQC"), demuxBarplot.basecallQC)

demuxBarplot.list <- function(object,groupBy=c("Lane")){
  metricToPlot <- "Count"
  groupByS <- unique(c("Lane","Sample","Project","Barcode","BarcodeStat"))
  groupByG <- unique(c(groupBy))
  
  toPlot <- object$demuxStatsProcessed %>%
    group_by_(.dots=as.list(groupByS)) %>%
    filter(Sample != "all") %>%
    summarise_(.dots = setNames(list(interp( ~sum(as.numeric(var)),
                                             var=as.name(metricToPlot))),
                                metricToPlot)) %>%
    spread_("BarcodeStat",metricToPlot) %>%
    mutate(mismatchedBarcodeCount=BarcodeCount-PerfectBarcodeCount) %>%
    dplyr::select(-BarcodeCount) %>%
    tbl_df %>%
    gather_(key_col="BarcodeCount",value_col=as.name(metricToPlot),c("mismatchedBarcodeCount","PerfectBarcodeCount"))
  p <- ggplot(data=toPlot,aes_string(x=groupByG,y=metricToPlot,fill=groupByG))+geom_bar(stat="identity")+ coord_flip()+facet_grid(BarcodeCount~.)
  return(p)
}

#' @rdname demuxBarplot
#' @export
setMethod("demuxBarplot", signature(object="list"), demuxBarplot.list)


  

Try the basecallQC package in your browser

Any scripts or data that you put into this service are public.

basecallQC documentation built on Nov. 8, 2020, 8:03 p.m.