knitr::opts_chunk$set(echo = TRUE) library(knitr) library(kableExtra) library(SummarizedExperiment) library(WGCNA) library(plotly) library(matrixStats) library(reshape2) library(tidyverse) library(ezRun) library(writexl) library(ggplot2) library(ggiraph)
if(!exists("dataset")){ dataset <- readRDS("dataset.rds") } if(!exists("param")){ param <- readRDS("param.rds") } if(!exists("resultList")){ resultList <- readRDS("resultList.rds") } conds = ezConditionsFromDataset(dataset, param=param) samples = rownames(dataset) sampleColors = getSampleColors(conds, samples) bamFiles = dataset$BAM
if (param$writeIgvSessionLink){ if (length(bamFiles) > 4){ idx = which(!duplicated(conds)) idx = idx[1:min(4, length(idx))] } else { idx = 1:length(bamFiles) } for (each in idx){ writeIgvSession(genome=getIgvGenome(param), refBuild=param$ezRef["refBuild"], file=basename(sub(".bam", "-igv.xml", bamFiles[each])), bamUrls=paste(PROJECT_BASE_URL, bamFiles[each], sep="/")) writeIgvJnlp(jnlpFile=basename(sub(".bam", "-igv.jnlp", bamFiles[each])), projectId=param$projectId, sessionUrl=paste(PROJECT_BASE_URL, sub(".bam", "-igv.xml", bamFiles[each]), sep="/")) } }
The plot holds for each sample the number of reads in Millions that have X matches in the target and are reported in the file.
mmValues = integer() for (sm in samples){ mmValues = union(mmValues, as.integer(names(resultList[[sm]]$multiMatchInFileTable))) } mmCounts = ezMatrix(0, rows=samples, cols=sort(mmValues)) for (sm in samples){ mm = resultList[[sm]]$multiMatchInFileTable mmCounts[sm, names(mm)] = mm }
alignmentCountBarPlot(mmCounts, relative=FALSE) alignmentCountBarPlot(mmCounts, relative=TRUE)
for (nm in c("multiMatchTargetTypeCounts", "uniqueMatchTargetTypeCounts")){ if (!is.null(resultList[[1]][[nm]])){ readSet = switch(nm, multiMatchTargetTypeCounts="Uniquely and multi-matching reads:", uniqueMatchTargetTypeCounts="Uniquely matching reads:") cat("\n###", paste(readSet, "Match Count Percentages"), "\n") tct = getTypeCountTable(resultList, nm) ezWrite.table(tct, file=paste0(nm, ".txt"), digits=4) tpt = as.matrix(tct) for (cn in colnames(tpt)){ tpt[ ,cn] = tct[ ,cn]/ tct["total", cn] * 100 } minPercentage = 1 rowsUse = setdiff(rownames(tpt)[apply(tpt, 1, max) > minPercentage], "total") tptUse = tpt[rowsUse, , drop=FALSE] if (nrow(tptUse) >= 2 && ncol(tptUse) >= 2){ tptUseRel = log2(tptUse) meanVals = apply(tptUseRel, 1, function(x){mean(x[is.finite(x)])}) ## ignore zero counts tptUseRel = tptUseRel - meanVals plotCmd = expression({ ezHeatmap(tptUseRel, margins=c(10, 12), lim=c(-2, 2), Rowv=FALSE, Colv=FALSE, main="Relative Prevalence [log2]") }) eval(plotCmd) k <- kable(signif(tptUse, digits=3), row.names=TRUE, format = "html", caption="Match Count Percentages") %>% kable_styling(bootstrap_options = "striped", full_width = F, position = "left") %>% footnote(general="Percentage value.") print(k) }else{ cat("The plot and table are shown when there are at least two samples!", "\n") } cat("###", paste(readSet, "Read Starts per Base"), "\n") tct = as.matrix(getTypeCoverageTable(resultList, nm)) if (nrow(tct) >= 2 && ncol(tct) >= 2){ tctRel = log2(sweep(tct, 2, tct["total", ], FUN="/")) tctRel = tctRel[rowsUse, , drop=FALSE] plotCmd = expression({ ezHeatmap(tctRel, margins=c(10, 12), lim=c(-5, 5), Rowv=FALSE, Colv=FALSE, main="Coverage Enrichment") }) eval(plotCmd) k <- kable(signif(tct[rowsUse, ], digits=4), row.names=TRUE, format = "html", caption="Read Starts per Base") %>% kable_styling(bootstrap_options = "striped", full_width = F, position = "left") %>% footnote(general="Read Starts per Base is equivalent to Coverage divided by Read length.") print(k) }else{ cat("The plot and table are shown when there are at least two samples!", "\n") } } }
if (length(resultList[[1]][["genebody_coverage"]]) != 0){ ## TODO this could be done better by searching alls results for a valid genebody_coverage element cat("###", "Coverage plot\n") cat("####", "Genebody coverage plots\n") covValues = ezMatrix(NA, cols=0:100, rows=samples) for (sm in samples){ y = resultList[[sm]][["genebody_coverage"]][["1200 to 2400nt"]][["medium expressed"]] if (!is.null(y)){ covValues[sm, ] = y/sum(y) } } xx <- reshape2::melt(covValues, varnames=c("Sample", "Percentile")) xx$Sample = as.factor(xx$Sample) gbcp <- ggplot(data=xx, aes(x=Percentile, y=value)) gbcp <- gbcp + geom_line_interactive(aes(color=Sample,tooltip = Sample, data_id = Sample)) gbcp <- gbcp + scale_colour_manual(values=sampleColors) + labs(x="percentile of gene (5'->3')", y="relative coverage") + ggtitle("genebody coverage") print(ggplot(data=xx, aes(x=Percentile, y=Sample, fill=value)) + geom_tile() + scale_fill_gradientn(colours=getBlueRedScale()) + labs(x="percentile of gene (5'->3')", y="relative coverage") + ggtitle("genebody coverage")) } tlc = resultList[[1]][["TranscriptsCovered"]] if (!is.null(tlc)){ cat("\n\n####", "The fraction of isoform length covered\n") geneFraction = ezMatrix(NA, cols=names(tlc), rows=samples) for (sm in samples){ tlc = resultList[[sm]][["TranscriptsCovered"]] geneFraction[sm, ] = tlc /sum(tlc) } xx <- reshape2::melt(geneFraction, varnames=c("Sample", "Percentile")) xx$Sample = as.factor(xx$Sample) ggplot(xx, aes(x = Sample, y = value, fill = Percentile)) + geom_col(position = "stack") + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + labs(x="", y="fraction of isoforms") }
if(exists('gbcp')) girafe(ggobj = gbcp, options = list( opts_hover_inv(css = "opacity:0.1;"), opts_hover(css = "stroke-width:2;"), opts_sizing(rescale = TRUE, width = .5)))
if(!is.null(resultList[[1]][["Junction"]])){ cat("###", "Junction saturation plot for all samples\n") junctionMaxVal = numeric() for (nm in names(resultList[[1]][["Junction"]])){ junctionMaxVal[nm] = 0 for (sm in samples){ junctionMaxVal[nm] = max(junctionMaxVal[nm], unlist(resultList[[sm]][["Junction"]][[nm]])) } } junctionPlots <- list() for (nm in names(resultList[[1]][["Junction"]][["junctionSaturation"]])){ junctionDF <- c() for (sm in samples){ junctionDF <- rbind(junctionDF, cbind(resultList[[sm]][["Junction"]][["junctionSaturation"]][[nm]], sm)) } junctionDF <- data.frame(readFraction = rownames(junctionDF), Junctions <- as.numeric(junctionDF[,1]), Sample = as.factor(junctionDF[,2])) junctionPlots[[nm]] <- ggplot(data=junctionDF, aes(x=readFraction, y=Junctions, group = Sample)) junctionPlots[[nm]] <- junctionPlots[[nm]] + geom_line_interactive(aes(color=Sample,tooltip = Sample, data_id = Sample)) + geom_point(aes(col=Sample), size = 1) junctionPlots[[nm]] <- junctionPlots[[nm]] + labs(x="percent of total reads", y="Number of splicing junctions (K)") + ggtitle(nm) junctionPlots[[nm]] <- girafe(ggobj = junctionPlots[[nm]], options = list(opts_hover_inv(css = "opacity:0.1;"), opts_hover(css = "stroke-width:2;"), opts_sizing(rescale = TRUE, width = .6))) } }
if(exists('junctionPlots')){ htmltools::tagList(setNames(junctionPlots, NULL)) }
if (!is.null(resultList[[1]][["Junction"]])){ cat("###", "Junction plots\n") for (sm in samples){ cat("\n") cat("####", sm, "\n") for (nm in names(resultList[[sm]][["Junction"]])){ junctionPlot = resultList[[sm]][["Junction"]][[nm]] plotCmd = expression({ if (nm %in% c("splice_events", "splice_junction")){ pie(junctionPlot, col=c(2,3,4), init.angle=30, angle=c(60,120,150), density=c(70,70,70),main=nm, labels=paste(names(junctionPlot), paste0(round(junctionPlot), "%"))) } else if (nm =="junctionSaturation"){ x = as.numeric(names(junctionPlot[[1]])) * 100 plot(1,1,xlab="percent of total reads", ylab='Number of splicing junctions (x1000)',type='o', ylim=c(0, junctionMaxVal[nm]/1000), xlim=range(x)) saturationColors = c("all junctions"="blue", "known junctions"="red", "novel junctions"="green") for (item in names(junctionPlot)){ lines(x, junctionPlot[[item]]/1000, col=saturationColors[item], type="o") } legend("topleft", legend=names(saturationColors), col=saturationColors,lwd=1,pch=1) } }) eval(plotCmd) } cat("\n") } }
if(!is.null(resultList[[1]]$fragSizeHist)){ cat("###", "Length distribution of fragments for paired reads\n") for (sm in samples){ fsh = resultList[[sm]]$fragSizeHist ezWrite.table(cbind(Length=fsh$mids, Count=fsh$counts), file=paste0(sm, "-fragSizeHist.txt"), row.names=FALSE) plotCmd = expression({ plot(fsh, xlab="fragment size", main=paste(sm, "-- Length Histogram"), ylim=c(0, max(fsh$counts[-length(fsh$counts)]))) ## don't use the longest fragment size }) try({ eval(plotCmd) }) } }
if (!is.null(resultList[[1]][["ErrorRates"]])){ cat("###", "Read position specific error rate\n") for (sm in samples){ for (nm in names(resultList[[sm]][["ErrorRates"]])){ errorRate = resultList[[sm]][["ErrorRates"]][[nm]] if (!is.null(errorRate)){ plotPosSpecificErrorRate(errorRate, main=paste(sm, nm)) } } } }
if(!is.null(resultList[[1]][["dupRate"]])){ cat("###", "Duplication rate quality control\n") cat("\n") cat("The number of reads per base assigned to a gene in an ideal RNA-Seq data set is expected to be proportional to the abundance of its transcripts in the sample. For lowly expressed genes we expect read duplication to happen rarely by chance, while for highly expressed genes - depending on the total sequencing depth - we expect read duplication to happen often.", "\n") cat("\n") require(dupRadar, quietly = TRUE) dupResult <- data.frame(Intercept = 0, Slope = 0, Sample = samples) rownames(dupResult) <- samples for(sm in samples){ dupData <- duprateExpFit(DupMat=resultList[[sm]][["dupRate"]]) dupResult[sm,] <- c(dupData$intercept, dupData$slope, sm) duprateExpDensPlot(DupMat=resultList[[sm]][["dupRate"]]) title(paste(sm, "-- 2D density scatter plot")) } dupResult$Intercept = as.numeric(dupResult$Intercept) dupResult$Slope = as.numeric(dupResult$Slope) dupPlot <- ggplot(data = dupResult, mapping = aes(x = Slope, y = Intercept, tooltip = Sample, data_id = Sample)) dupPlot <- dupPlot + geom_point_interactive(aes(col=Sample), size = 2, hover_nearest = TRUE) + ggtitle('MultiSample Plot') dupPlot <- girafe(ggobj = dupPlot, options = list(opts_sizing(rescale = TRUE, width = .7))) }
if(exists('dupPlot')) dupPlot
ezInteractiveTableRmd(dataset)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.