Nothing
#' Describe parameters relevant to diagnostics
#'
#' Many of the diagnostics functions expect (optional or mandatory) parameters that are described by this function
#'
#' @param data a grandR object
#'
#'
#' @return a list with
#' \itemize{
#' \item{orientation: Sense or Antisense, only relevant to mismatches for strand unspecific data}
#' \item{category: all available categories (Exonic/Intronic, genomes). Note that this might differ from what is available from GeneInfo(data,"Category"), since Grand3 might not have estimated NTRs for all categories!}
#' \item{label: which nucleoside analogs have been used}
#' \item{model: which model (binom or tbbinom) to inspect}
#' \item{estimator: which estimator (joint or separate NTRs were estimated for subreads)}
#' }
#' @export
#'
#' @concept diagnostics
GetDiagnosticParameters=function(data) {
tab=GetTableQC(data,"mismatch.raw.position")
orients = unique(as.character(factor(c("Antisense","Sense")[tab$Sense+1],levels=c("Sense","Antisense"))))
cats=unique(as.character(tab$Category))
tab=GetTableQC(data,"model.parameters")
labels=unique(as.character(tab$Label))
samples=unique(as.character(GetTableQC(data,"conversion.perr")$Condition))
subreads=as.character(GetTableQC(data,"subread")$Semantic)
list(
sample=samples,
orientation=orients,
category=cats,
label=labels,
model=c("Binom","TB-Binom"),
estimator=c("Separate","Joint","MaskedError"),
subread=subreads
)
}
#' Diagnostic plot for mismatch position for columns (by sample)
#'
#' This belongs to the first diagnostic plots (raw mismatches) generated by GRAND3.
#'
#' For all positions along the reads (x axis; potentially paired end, shown left and right),
#' show the percentage of all mismatch types. The panel in column T and row C shows T-to-C mismatches.
#' Positions outside of shaded areas are clipped. Uncorrected and Retained means before and
#' after correcting multiply sequenced bases. Sense/Antisense means reads (first read for paired end)
#' that are (based on the annotation) oriented in sense or antisense direction to a gene
#' (i.e. this is only relevant for sequencing protocols that do not preserve strand information).
#'
#' @param data a grandR object
#' @param sample a sample name
#' @param orientation restrict to either Sense or Antisense; can be NULL
#' @param category restrict to a specific category (see \link{GetDiagnosticParameters}); can be NULL
#' @param max.pos remove everything behind this position
#'
#' @return a list with a ggplot object, a description, and the desired size for the plot
#' @export
#'
#' @concept diagnostics
PlotMismatchPositionForSample=function(data,sample,orientation=NULL,category=NULL,max.pos=2500) {
# R CMD check guard for non-standard evaluation
x <- `Mismatch frequency` <- SenseCat <- Corrected <- NULL
tab=GetTableQC(data,"mismatch.raw.position")
clip=GetTableQC(data,"clip")
clip=setNames(clip[,2],clip[,1])
if (!is.null(orientation)) tab=tab[tab$Sense %in% setNames(c(0,1),c("Antisense","Sense"))[orientation],]
if (!is.null(category)) tab=tab[tab$Category %in% category,]
tab=tab[tab$Position<=max.pos,]
tab$`Mismatch frequency`=tab[,sample]
max1=max(c(0,tab$Position[tab$`First read`==1]))
max2=max(c(0,tab$Position[tab$`First read`==0]))
offset=round(max1*1.2)
tab$x=ifelse(tab$`First read`==1,tab$Position,max2-tab$Position+offset)
tab$Corrected=factor(c("Uncorrected","Retained")[tab$Corrected+1],levels=c("Retained","Uncorrected"))
tab$Sense=factor(c("Antisense","Sense")[tab$Sense+1],levels=c("Sense","Antisense"))
sep=tab[tab$`First read`==1&tab$Position==1,]
sep$`Mismatch frequency`=NA
sep$x=max(tab$Position[tab$`First read`==1])+1
tab=droplevels(rbind(tab,sep))
tab$SenseCat=paste(tab$Sense,tab$Category,sep=",")
lab1=labeling::extended(1,max1,5)
lab2=labeling::extended(1,max2,5)
tab$Read=factor(tab$Read,levels=c("A","C","G","T"))
tab$Genomic=factor(tab$Genomic,levels=c("A","C","G","T"))
ymax=quantile(tab$`Mismatch frequency`,0.99,na.rm=TRUE)*1.2
g=ggplot(tab,aes(x,`Mismatch frequency`,color=SenseCat,linetype=factor(Corrected),group=interaction(SenseCat,Corrected)))+
cowplot::theme_cowplot()
if (!is.na(clip["Used 5p1"]) || !is.na(clip["Used 3p1"])) {
xmin=if (!is.na(clip["Used 5p1"])) clip["Used 5p1"]+1 else 1
xmax=if (!is.na(clip["Used 3p1"])) max1-clip["Used 3p1"] else max1
g=g+geom_rect(xmin=xmin,xmax=xmax,ymin=0,ymax=Inf,color="gray",fill="gray")
}
if (max2>0 && (!is.na(clip["Used 5p2"]) || !is.na(clip["Used 3p2"]))) {
xmin=if (!is.na(clip["Used 3p2"])) clip["Used 3p2"]+offset else offset
xmax=if (!is.na(clip["Used 5p2"])) max2+offset-clip["Used 5p2"]-1 else max2+offset
g=g+geom_rect(xmin=xmin,xmax=xmax,ymin=0,ymax=Inf,color="gray",fill="gray")
}
if (!is.na(clip["Inferred 5p1"])) g=g+geom_vline(xintercept=clip["Inferred 5p1"]+1)
if (!is.na(clip["Inferred 3p1"])) g=g+geom_vline(xintercept=max1-clip["Inferred 3p1"])
if (max2>0 && !is.na(clip["Inferred 5p2"])) g=g+geom_vline(xintercept=max2-clip["Inferred 5p2"]+offset-1)
if (max2>0 && !is.na(clip["Inferred 3p2"])) g=g+geom_vline(xintercept=offset+clip["Inferred 3p2"])
g=g+geom_line()+
facet_grid(Read~Genomic)+
scale_linetype_discrete(NULL,guide=if(length(unique(tab$Corrected))<=1) guide_none() else guide_legend())+
scale_color_brewer(NULL,palette="Dark2")+
scale_x_continuous(breaks=c(lab1,max2-lab2+offset),labels=c(lab1,lab2))+
scale_y_continuous(labels = scales::label_percent(0.1))+
coord_cartesian(ylim=c(0,ymax))+
xlab(NULL)+
ggtitle(sample)
list(plot=g,
description="For all positions along the reads (x axis; potentially paired end, shown left and right), show the percentage of all mismatch types. The panel in column T and row C shows T-to-C mismatches. Positions outside of shaded areas are clipped. Uncorrected and Retained means before and after correcting multiply sequenced bases. Sense/Antisense means reads (first read for paired end) that are (based on the annotation) oriented in sense or antisense direction to a gene (i.e. this is only relevant for sequencing protocols that do not preserve strand information).",
size=c(with=12,height=7)
)
}
#' Diagnostic plot for mismatch position for columns (by mismatch type)
#'
#' This belongs to the first diagnostic plots (raw mismatches) generated by GRAND3.
#'
#' For all positions along the reads (x axis; potentially paired end, shown left and right),
#' show the percentage of a specific mismatch type for all samples.
#' Positions outside of shaded areas are clipped. Uncorrected and Retained means before and
#' after correcting multiply sequenced bases. Sense/Antisense means reads (first read for paired end)
#' that are (based on the annotation) oriented in sense or antisense direction to a gene
#' (i.e. this is only relevant for sequencing protocols that do not preserve strand information).
#'
#' @param data a grandR object
#' @param genomic the nucleotide as it occurs in the genome
#' @param read the nucleotide as it occurs in the read
#' @param orientation restrict to either Sense or Antisense; can be NULL
#' @param category restrict to a specific category (see \link{GetDiagnosticParameters}); can be NULL
#' @param max.pos remove everything behind this position
#'
#' @return a list with a ggplot object, a description, and the desired size for the plot
#' @export
#'
#' @concept diagnostics
PlotMismatchPositionForType=function(data,genomic,read,orientation=NULL,category=NULL,max.pos=2500) {
# R CMD check guard for non-standard evaluation
Sample <- x <- `Mismatch frequency` <- Corrected <- NULL
tab=GetTableQC(data,"mismatch.raw.position")
clip=GetTableQC(data,"clip")
clip=setNames(clip[,2],clip[,1])
if (!is.null(orientation)) tab=tab[tab$Sense %in% setNames(c(0,1),c("Antisense","Sense"))[orientation],]
if (!is.null(category)) tab=tab[tab$Category %in% category,]
tab=tab[tab$Position<=max.pos,]
tab=tab[tab$Genomic==genomic & tab$Read==read,]
max1=max(c(0,tab$Position[tab$`First read`==1]))
max2=max(c(0,tab$Position[tab$`First read`==0]))
offset=round(max1*1.2)
tab$Corrected=factor(c("Uncorrected","Retained")[tab$Corrected+1],levels=c("Retained","Uncorrected"))
tab$Sense=factor(c("Antisense","Sense")[tab$Sense+1],levels=c("Sense","Antisense"))
tab=reshape2::melt(tab,value.name="Mismatch frequency",variable.name="Sample",id.vars=names(tab)[1:7])
tab$SenseCat=paste(tab$Sense,tab$Category,sep=",")
tab$x=ifelse(tab$`First read`==1,tab$Position,max2-tab$Position+offset)
sep=tab[tab$`First read`==1&tab$Position==1,]
sep$`Mismatch frequency`=NA
sep$x=max(tab$Position[tab$`First read`==1])+1
tab=droplevels(rbind(tab,sep))
lab1=labeling::extended(1,max1,5)
lab2=labeling::extended(1,max2,5)
ymax=max(plyr::ddply(tab,plyr::.(Sample),function(s) quantile(s$`Mismatch frequency`,0.95,na.rm=TRUE)*1.2)[,2])
g=ggplot(tab,aes(x,`Mismatch frequency`,color=Sample,linetype=factor(Corrected)))+cowplot::theme_cowplot()
if (!is.na(clip["Used 5p1"]) || !is.na(clip["Used 3p1"])) {
xmin=if (!is.na(clip["Used 5p1"])) clip["Used 5p1"]+1 else 1
xmax=if (!is.na(clip["Used 3p1"])) max1-clip["Used 3p1"] else max1
g=g+geom_rect(xmin=xmin,xmax=xmax,ymin=0,ymax=Inf,color="gray",fill="gray")
}
if (max2>0 && (!is.na(clip["Used 5p2"]) || !is.na(clip["Used 3p2"]))) {
xmin=if (!is.na(clip["Used 3p2"])) clip["Used 3p2"]+offset else offset
xmax=if (!is.na(clip["Used 5p2"])) max2+offset-clip["Used 5p2"]-1 else max2+offset
g=g+geom_rect(xmin=xmin,xmax=xmax,ymin=0,ymax=Inf,color="gray",fill="gray")
}
if (!is.na(clip["Inferred 5p1"])) g=g+geom_vline(xintercept=clip["Inferred 5p1"]+1)
if (!is.na(clip["Inferred 3p1"])) g=g+geom_vline(xintercept=max1-clip["Inferred 3p1"])
if (max2>0 && !is.na(clip["Inferred 5p2"])) g=g+geom_vline(xintercept=max2-clip["Inferred 5p2"]+offset-1)
if (max2>0 && !is.na(clip["Inferred 3p2"])) g=g+geom_vline(xintercept=offset+clip["Inferred 3p2"])
g=g+geom_line()+facet_grid(Category~Sense)+scale_linetype_discrete(NULL,guide=if(length(unique(tab$Corrected))<=1) guide_none() else guide_legend())+scale_x_continuous(breaks=c(lab1,max2-lab2+offset),labels=c(lab1,lab2))+coord_cartesian(ylim=c(0,ymax))+xlab(NULL)+ggtitle(paste0(genomic,">",read))
list(plot=g,
description="For all positions along the reads (x axis; potentially paired end, shown left and right), show the percentage of a specific mismatch type for all samples. Positions outside of shaded areas are clipped. Uncorrected and Retained means before and after correcting multiply sequenced bases. Sense/Antisense means reads (first read for paired end) that are (based on the annotation) oriented in sense or antisense direction to a gene (i.e. this is only relevant for sequencing protocols that do not preserve strand information).",
size=c(width=length(unique(tab$Sense))*5+2,height=length(unique(tab$Category))*2+0.2)
)
}
#' Diagnostic plot for conversion frequencies
#'
#' This is the second diagnostic plot (estimated conversions) generated by GRAND3.
#'
#' Show the percentage of all conversion types for all samples. In contrast to mismatches
#' (see \link{PlotMismatchPositionForSample} and \link{PlotMismatchPositionForType}),
#' the correct strand is already inferred for conversions, i.e. conversions refer to
#' actual conversion events on RNA, whereas mismatches are observed events in mapped reads.
#'
#'
#' @param data the grandR object
#' @param category show a specific category (see \link{GetDiagnosticParameters}); cannot be NULL
#' @param sample compare subreads for a specific sample; can be NULL, then compare all samples per subread
#' @param max.columns if there are more columns (samples for bulk, cells for single cell) than this, show boxplots instead of points
#'
#' @return a list with a ggplot object, a description, and the desired size for the plot
#' @export
#'
#' @concept diagnostics
PlotConversionFreq=function(data,category,sample=NULL,max.columns=120) {
# R CMD check guard for non-standard evaluation
Category <- Semantic <- Genomic <- Read <- Frequency <- NULL
if (is.null(category)) stop("No category defined; see GetDiagnosticParameters(data)$category for choices!")
tab=GetTableQC(data,"conversion.freq")
tab$Condition=factor(tab$Condition,levels=unique(tab$Condition))
subr=GetTableQC(data,"subread")
subr$Semantic=factor(as.character(subr$Semantic),levels=subr$Semantic)
tab=merge(tab,subr,by="Subread")
tab=tab[tab$Category==category,]
ncond=nrow(tab)/nrow(unique(data.frame(tab$Genomic,tab$Read,tab$Semantic)))
if (is.null(sample)) {
if (ncond>=max.columns) {
max=max(plyr::ddply(tab,plyr::.(Category,Condition,Semantic,Genomic,Read),function(s) c(max=quantile(s$Frequency,0.99)))$max)
if (length(unique(tab$Condition))<ncond/100) {
g=ggplot(tab,aes(paste0(Genomic,"->",Read),Frequency,fill=Condition))+cowplot::theme_cowplot()+
geom_hline(yintercept=0,linetype=2)+geom_boxplot(width=0.4,outlier.size = 0.1)+facet_grid(Semantic~.,scales="free_y")+theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+xlab(NULL)+ggtitle(category)+scale_fill_brewer(NULL,palette="Set2")+coord_cartesian(ylim=c(0,max))
} else {
g=ggplot(tab,aes(paste0(Genomic,"->",Read),Frequency))+cowplot::theme_cowplot()+
geom_hline(yintercept=0,linetype=2)+geom_boxplot(width=0.8)+facet_grid(Semantic~.,scales="free_y")+theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+xlab(NULL)+ggtitle(category)+coord_cartesian(ylim=c(0,max))
}
} else {
g=ggplot(tab,aes(paste0(Genomic,"->",Read),Frequency,color=Condition))+cowplot::theme_cowplot()+
geom_hline(yintercept=0,linetype=2)+geom_point(position=position_dodge(width=0.7))+facet_grid(Semantic~.,scales="free_y")+theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+xlab(NULL)+ggtitle(category)
}
} else {
tab=tab[tab$Condition==sample,]
if (ncond>=max.columns) {
max=max(plyr::ddply(tab,plyr::.(Category,Semantic,Genomic,Read),function(s) c(max=quantile(s$Frequency,0.99)))$max)
g=ggplot(tab,aes(paste0(Genomic,"->",Read),Frequency,fill=Semantic))+cowplot::theme_cowplot()+
geom_hline(yintercept=0,linetype=2)+geom_boxplot(width=0.4,outlier.size = 0.1)+theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+xlab(NULL)+ggtitle(category)+scale_fill_brewer(NULL,palette="Set2")+coord_cartesian(ylim=c(0,max))
} else {
g=ggplot(tab,aes(paste0(Genomic,"->",Read),Frequency,color=Semantic))+cowplot::theme_cowplot()+
geom_hline(yintercept=0,linetype=2)+geom_point(position=position_dodge(width=0.7))+theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+xlab(NULL)+ggtitle(category)
}
}
list(plot=g,
description="Show the percentage of all conversion types for all samples. In contrast to mismatches (see PlotMismatchPositionForSample and PlotMismatchPositionForType), the correct strand is already inferred for conversions, i.e. conversions refer to actual conversion events on RNA, whereas mismatches are observed events in mapped reads.",
size=c(width=if (ncond>=max.columns) 10 else 5+ncond,height=7)
)
}
#' Diagnostic plot for estimated models (global NTR)
#'
#' This belongs to the third kind (model) of diagnostic plots
#'
#' Shows the estimated global NTR (i.e. for all reads used for estimation of
#' global paramters, what is the percentage of new RNA) for each sample.
#'
#' @param data a grandR object
#' @param label which label to consider (see \link{GetDiagnosticParameters}); cannot be NULL
#' @param estimator which estimator to consider (see \link{GetDiagnosticParameters}); cannot be NULL
#' @param model which model to consider (see \link{GetDiagnosticParameters}); cannot be NULL
#'
#' @return a list with a ggplot object, a description, and the desired size for the plot
#' @export
#'
#' @concept diagnostics
PlotModelNtr=function(data,label="4sU",estimator="Separate",model="Binom") {
# R CMD check guard for non-standard evaluation
Subread <- NULL
tab=GetTableQC(data,"model.parameters")
tab$Condition=factor(tab$Condition,levels=unique(tab$Condition))
tab=tab[tab$Label==label & tab$Estimator==estimator,]
cond=unique(tab$Condition)
ncond=length(cond)
param=paste(model[1],"ntr")
lower=paste("Lower",model[1],"ntr")
upper=paste("Upper",model[1],"ntr")
qq=function(s) paste0("`",s,"`")
if (lower %in% names(tab)) {
g=ggplot(tab,aes(Condition,!!sym(param),color=Subread,ymin=!!sym(lower),ymax=!!sym(upper)))+
cowplot::theme_cowplot()+
geom_errorbar(width=0.1,position=position_dodge(width=0.2))+
geom_point(position=position_dodge(width=0.2))+
coord_cartesian(ylim=c(0,max(tab[,upper])))+
ylab("Global NTR")+xlab(NULL)+
scale_y_continuous(labels = scales::label_percent(1))+
scale_color_brewer(NULL,palette="Dark2")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
}else{
g=ggplot(tab,aes(Condition,!!sym(param),color=Subread))+
cowplot::theme_cowplot()+
geom_point(position=position_dodge(width=0.2))+
coord_cartesian(ylim=c(0,max(tab[,param])))+
ylab("Global NTR")+xlab(NULL)+
scale_y_continuous(labels = scales::label_percent(1))+
scale_color_brewer(NULL,palette="Dark2")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
}
list(
plot=g,
description="Shows the estimated global NTR (i.e. for all reads used for estimation of global paramters, what is the percentage of new RNA) for each sample.",
size=c(width=3+ncond/2,height=7)
)
}
#' Diagnostic plot for estimated models (global conversion rate)
#'
#' This belongs to the third kind (model) of diagnostic plots
#'
#' Shows the estimated conversion rate (i.e., the probability for a conversion
#' on a new RNA molecule) for each sample.
#'
#' @param data a grandR object
#' @param label which label to consider (see \link{GetDiagnosticParameters}); cannot be NULL
#' @param estimator which estimator to consider (see \link{GetDiagnosticParameters}); cannot be NULL
#' @param model which model to consider (see \link{GetDiagnosticParameters}); cannot be NULL
#'
#' @return a list with a ggplot object, a description, and the desired size for the plot
#' @export
#'
#' @concept diagnostics
PlotModelConv=function(data,label="4sU",estimator="Separate",model="Binom") {
# R CMD check guard for non-standard evaluation
Type <- Subread <- NULL
tab=GetTableQC(data,"model.parameters")
tab$Condition=factor(tab$Condition,levels=unique(tab$Condition))
cond=unique(tab$Condition)
ncond=length(cond)
etbeta_save=function(l=0,u=1,s1=1,s2=1) {
use=is.finite(l+u+s1+s2)
re=rep(NA,length(l))
if (sum(use)>0) re[use]=etbeta(l[use],u[use],s1[use],s2[use])
re
}
tab=tab[tab$Label==label & tab$Estimator==estimator,]
if (model[1]=="TB-Binom") {
param=paste(model[1],"p.mconv")
lower=paste("Lower",model[1],"p.mconv")
upper=paste("Upper",model[1],"p.mconv")
qq=function(s) paste0("`",s,"`")
tab2=tab
tab2$`TB-Binom p.mconv`=etbeta_save(tab2$`TB-Binom p.err`,tab2$`TB-Binom p.mconv`,exp(tab$`TB-Binom shape`),exp(-tab$`TB-Binom shape`))
if (lower %in% names(tab)) {
tab2$`Lower TB-Binom p.mconv`=etbeta_save(tab2$`Lower TB-Binom p.err`,tab2$`Lower TB-Binom p.mconv`,exp(tab2$`Lower TB-Binom shape`),exp(-tab2$`Lower TB-Binom shape`))
tab2$`Upper TB-Binom p.mconv`=etbeta_save(tab2$`Upper TB-Binom p.err`,tab2$`Upper TB-Binom p.mconv`,exp(tab2$`Upper TB-Binom shape`),exp(-tab2$`Upper TB-Binom shape`))
tab=rbind(cbind(Type="Max",tab),cbind(Type="Mean",tab2))
g=ggplot(tab,aes(Condition,!!sym(param),color=Subread,shape=Type,ymin=!!sym(lower),ymax=!!sym(upper)))+cowplot::theme_cowplot()+
geom_errorbar(width=0.1,position=position_dodge(width=0.2))+geom_point(position=position_dodge(width=0.2))+coord_cartesian(ylim=c(0,max(tab[,upper])))+ylab("p.conv")+xlab(NULL)+scale_color_brewer(NULL,palette="Dark2")+theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+scale_shape_manual(NULL,values=c(Max=1,Mean=19))
} else {
tab=rbind(cbind(Type="Max",tab),cbind(Type="Mean",tab2))
g=ggplot(tab,aes(Condition,!!sym(param),color=Subread,shape=Type))+cowplot::theme_cowplot()+
geom_point(position=position_dodge(width=0.2))+coord_cartesian(ylim=c(0,max(tab[,param])))+ylab("p.conv")+xlab(NULL)+scale_color_brewer(NULL,palette="Dark2")+theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+scale_shape_manual(NULL,values=c(Max=1,Mean=19))
}
} else {
param=paste(model[1],"p.conv")
lower=paste("Lower",model[1],"p.conv")
upper=paste("Upper",model[1],"p.conv")
qq=function(s) paste0("`",s,"`")
if (lower %in% names(tab)) {
g=ggplot(tab,aes(Condition,!!sym(param),color=Subread,ymin=!!sym(lower),ymax=!!sym(upper)))+cowplot::theme_cowplot()+
geom_errorbar(width=0.1,position=position_dodge(width=0.2))+geom_point(position=position_dodge(width=0.2))+coord_cartesian(ylim=c(0,max(tab[,upper])))+ylab("p.conv")+xlab(NULL)+scale_color_brewer(NULL,palette="Dark2")+theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
} else {
g=ggplot(tab,aes(Condition,!!sym(param),color=Subread))+cowplot::theme_cowplot()+
geom_point(position=position_dodge(width=0.2))+coord_cartesian(ylim=c(0,max(tab[,param])))+ylab("p.conv")+xlab(NULL)+scale_color_brewer(NULL,palette="Dark2")+theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
}
}
list(
plot=g,
description="Shows the estimated conversion rate (i.e., the probability for a conversion on a new RNA molecule) for each sample.",
size=c(width=3+ncond/2,height=7)
)
}
#' Diagnostic plot for estimated models (global error rate)
#'
#' This belongs to the third kind (model) of diagnostic plots
#'
#' Shows the estimated error rate (i.e., the probability for a conversion
#' on an old RNA molecule) for each sample.
#'
#' @param data a grandR object
#' @param label which label to consider (see \link{GetDiagnosticParameters}); cannot be NULL
#' @param estimator which estimator to consider (see \link{GetDiagnosticParameters}); cannot be NULL
#' @param model which model to consider (see \link{GetDiagnosticParameters}); cannot be NULL
#'
#' @return a list with a ggplot object, a description, and the desired size for the plot
#' @export
#'
#' @concept diagnostics
PlotModelErr=function(data,label="4sU",estimator="Separate",model="Binom") {
# R CMD check guard for non-standard evaluation
Subread <- NULL
tab=GetTableQC(data,"model.parameters")
tab$Condition=factor(tab$Condition,levels=unique(tab$Condition))
cond=unique(tab$Condition)
ncond=length(cond)
tab=tab[tab$Label==label & tab$Estimator==estimator,]
param=paste(model[1],"p.err")
lower=paste("Lower",model[1],"p.err")
upper=paste("Upper",model[1],"p.err")
qq=function(s) paste0("`",s,"`")
savemax=function(a,b=a) max(c(a,b)[is.finite(c(a,b))])
if (lower %in% names(tab)) {
g=ggplot(tab,aes(Condition,!!sym(param),color=Subread,ymin=!!sym(lower),ymax=!!sym(upper)))+cowplot::theme_cowplot()+
geom_errorbar(width=0.1,position=position_dodge(width=0.2))+geom_point(position=position_dodge(width=0.2))+coord_cartesian(ylim=c(0,savemax(tab[,upper],tab[,param])))+ylab("p.err")+xlab(NULL)+scale_color_brewer(NULL,palette="Dark2")+theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
}else{
g=ggplot(tab,aes(Condition,!!sym(param),color=Subread))+cowplot::theme_cowplot()+
geom_point(position=position_dodge(width=0.2))+coord_cartesian(ylim=c(0,savemax(tab[,param])))+ylab("p.err")+xlab(NULL)+scale_color_brewer(NULL,palette="Dark2")+theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
}
list(
plot=g,
description="Shows the estimated error rate (i.e., the probability for a conversion on an old RNA molecule) for each sample.",
size=c(width=3+ncond/2,height=7)
)
}
#' Diagnostic plot for estimated models (global shape parameter)
#'
#' This belongs to the third kind (model) of diagnostic plots
#'
#' Shows the estimated shape parameter (describing the increase of 4sU over time) in the tbbinom model for each sample.
#'
#' @param data a grandR object
#' @param label which label to consider (see \link{GetDiagnosticParameters}); cannot be NULL
#' @param estimator which estimator to consider (see \link{GetDiagnosticParameters}); cannot be NULL
#'
#' @return a list with a ggplot object, a description, and the desired size for the plot
#' @export
#'
#' @concept diagnostics
PlotModelShape=function(data,label="4sU",estimator="Separate") {
# R CMD check guard for non-standard evaluation
`TB-Binom shape` <- Subread <- `Lower TB-Binom shape` <- `Upper TB-Binom shape` <- NULL
tab=GetTableQC(data,"model.parameters")
tab$Condition=factor(tab$Condition,levels=unique(tab$Condition))
cond=unique(tab$Condition)
ncond=length(cond)
tab=tab[tab$Label==label & tab$Estimator==estimator,]
if ("Lower TB-Binom shape" %in% names(tab)) {
g=ggplot(tab,aes(Condition,`TB-Binom shape`,color=Subread,ymin=`Lower TB-Binom shape`,ymax=`Upper TB-Binom shape`))+cowplot::theme_cowplot()+
geom_errorbar(width=0.1,position=position_dodge(width=0.2))+geom_point(position=position_dodge(width=0.2))+ylab("shape")+xlab(NULL)+scale_color_brewer(NULL,palette="Dark2")+theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
}else{
g=ggplot(tab,aes(Condition,`TB-Binom shape`,color=Subread))+cowplot::theme_cowplot()+
geom_point(position=position_dodge(width=0.2))+ylab("shape")+xlab(NULL)+scale_color_brewer(NULL,palette="Dark2")+theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
}
list(
plot=g,
description="Shows the estimated shape parameter (describing the increase of 4sU over time) in the tbbinom model for each sample.",
size=c(width=3+ncond/2,height=7)
)
}
#' Diagnostic plot for estimated models (4sU increase)
#'
#' This belongs to the third kind (model) of diagnostic plots
#'
#' Shows the estimated time evolution of 4sU increase in the tbbinom model for each sample.
#'
#' @param data a grandR object
#' @param label which label to consider (see \link{GetDiagnosticParameters}); cannot be NULL
#' @param estimator which estimator to consider (see \link{GetDiagnosticParameters}); cannot be NULL
#'
#' @return a list with a ggplot object, a description, and the desired size for the plot
#' @export
#'
#' @concept diagnostics
PlotModelLabelTimeCourse=function(data,label="4sU",estimator="Separate") {
# R CMD check guard for non-standard evaluation
Time <- `Labeling efficiency` <- Subread <- NULL
tab=GetTableQC(data,"model.parameters")
tab$Condition=factor(tab$Condition,levels=unique(tab$Condition))
e=GetTableQC(data,"experimentalDesign")
cond=unique(tab$Condition)
ncond=length(cond)
tab=tab[tab$Label==label & tab$Estimator==estimator,]
e$Condition=ifelse(is.na(e$Sample),as.character(e$Library),paste(e$Library,e$Sample,sep="."))
e=unique(cbind(Condition=e$Condition,e[,grep(label,names(e))]))
names(e)=gsub(paste0(label," "),"",names(e))
e=e[!is.na(e$concentration),]
x=seq(0,1,length.out=100)
make.df=function(row) data.frame(x=x,`Labeling efficiency`=qtbeta(x,tab$`TB-Binom p.err`[row],tab$`TB-Binom p.mconv`[row],exp(tab$`TB-Binom shape`[row]),exp(-tab$`TB-Binom shape`[row])),Condition=tab$Condition[row],Subread=tab$Subread[row],check.names=FALSE)
df=do.call("rbind",lapply(1:dim(tab)[1],make.df))
if (length(unique(df$Condition))!=length(e$Condition)) stop("Conditions in experimental design and parameter file are inconsistent!")
df=merge(df,e)
df$concentration=factor(df$concentration,levels=sort(unique(df$concentration)))
df$Time=df$x*df$duration+(1-df$x)*df$chase
g=ggplot(df,aes(Time,`Labeling efficiency`,color=Condition,linetype=Subread))+cowplot::theme_cowplot()+
geom_line()+scale_color_viridis_d(NULL,)+scale_linetype_discrete(NULL)
list(
plot=g,
description="Shows the estimated time evolution of 4sU increase in the tbbinom model for each sample.",
size=c(width=10,height=7)
)
}
#' Diagnostic plot for estimated models (global error rate)
#'
#' This belongs to the fourth kind (model comparison) of diagnostic plots
#'
#' Compares the prior error rate (estimated from no4sU samples or from all other mismatch types)
#' against the final error rate estimate.
#'
#' @param data a grandR object
#' @param label which label to consider (see \link{GetDiagnosticParameters}); cannot be NULL
#' @param estimator which estimator to consider (see \link{GetDiagnosticParameters}); cannot be NULL
#' @param model which model to consider (see \link{GetDiagnosticParameters}); cannot be NULL
#'
#' @return a list with a ggplot object, a description, and the desired size for the plot
#' @export
#'
#' @concept diagnostics
PlotModelCompareErrPrior=function(data,label="4sU",estimator="Separate",model="Binom") {
# R CMD check guard for non-standard evaluation
`Lower prior p.err` <- `Upper prior p.err` <- Subread <- NULL
tab=GetTableQC(data,"model.parameters")
tab$Condition=factor(tab$Condition,levels=unique(tab$Condition))
cond=unique(tab$Condition)
ncond=length(cond)
tab=tab[tab$Label==label & tab$Estimator==estimator,]
param=paste(model[1],"p.err")
qq=function(s) paste0("`",s,"`")
g=ggplot(tab,aes((`Lower prior p.err`+`Upper prior p.err`)/2,!!sym(param),color=Subread,xmin=`Lower prior p.err`,xmax=`Upper prior p.err`))+cowplot::theme_cowplot()+
geom_point()+geom_errorbarh()+geom_abline()+scale_color_brewer(NULL,palette="Dark2")+xlab("Prior p.err")
list(
plot=g,
description="Compares the prior error rate (estimated from no4sU samples or from all other mismatch types) against the final error rate estimate.",
size=c(width=9,height=7)
)
}
#' Diagnostic plot for estimated models (global NTR)
#'
#' This belongs to the fourth kind (model comparison) of diagnostic plots
#'
#' Compares the global NTR (i.e. for all reads used for estimation of
#' global parameters, what is the percentage of new RNA) for the binom and tbbinom models.
#'
#' @param data a grandR object
#' @param label which label to consider (see \link{GetDiagnosticParameters}); cannot be NULL
#' @param estimator which estimator to consider (see \link{GetDiagnosticParameters}); cannot be NULL
#'
#' @return a list with a ggplot object, a description, and the desired size for the plot
#' @export
#'
#' @concept diagnostics
PlotModelCompareNtr=function(data,label="4sU",estimator="Separate") {
# R CMD check guard for non-standard evaluation
`Binom ntr` <- `TB-Binom ntr` <- Subread <- NULL
tab=GetTableQC(data,"model.parameters")
tab$Condition=factor(tab$Condition,levels=unique(tab$Condition))
cond=unique(tab$Condition)
ncond=length(cond)
tab=tab[tab$Label==label & tab$Estimator==estimator,]
g=ggplot(tab,aes(`Binom ntr`,`TB-Binom ntr`,color=Subread))+cowplot::theme_cowplot()+
geom_point()+geom_abline()+scale_color_brewer(NULL,palette="Dark2")
list(
plot=g,
description="Compares the global NTR (i.e. for all reads used for estimation of global parameters, what is the percentage of new RNA) for the binom and tbbinom models.",
size=c(width=9,height=7)
)
}
#' Diagnostic plot for estimated models (global error rate)
#'
#' This belongs to the fourth kind (model comparison) of diagnostic plots
#'
#' Compares the estimated error rate (i.e., the probability for a conversion
#' on an old RNA molecule) for the binom and tbbinom models.
#'
#' @param data a grandR object
#' @param label which label to consider (see \link{GetDiagnosticParameters}); cannot be NULL
#' @param estimator which estimator to consider (see \link{GetDiagnosticParameters}); cannot be NULL
#'
#' @return a list with a ggplot object, a description, and the desired size for the plot
#' @export
#'
#' @concept diagnostics
PlotModelCompareErr=function(data,label="4sU",estimator="Separate") {
# R CMD check guard for non-standard evaluation
`Binom p.err` <- `TB-Binom p.err` <- Subread <- NULL
tab=GetTableQC(data,"model.parameters")
tab$Condition=factor(tab$Condition,levels=unique(tab$Condition))
cond=unique(tab$Condition)
ncond=length(cond)
tab=tab[tab$Label==label & tab$Estimator==estimator,]
g=ggplot(tab,aes(`Binom p.err`,`TB-Binom p.err`,color=Subread))+cowplot::theme_cowplot()+
geom_point()+geom_abline()+scale_color_brewer(NULL,palette="Dark2")
list(
plot=g,
description="Compares the estimated error rate (i.e., the probability for a conversion on an old RNA molecule) for the binom and tbbinom models.",
size=c(width=9,height=7)
)
}
#' Diagnostic plot for estimated models (global conversion rate)
#'
#' This belongs to the fourth kind (model comparison) of diagnostic plots
#'
#' Compares the estimated conversion rate (i.e., the probability for a conversion
#' on a new RNA molecule) for the binom and tbbinom models (mean conversion rate).
#'
#' @param data a grandR object
#' @param label which label to consider (see \link{GetDiagnosticParameters}); cannot be NULL
#' @param estimator which estimator to consider (see \link{GetDiagnosticParameters}); cannot be NULL
#'
#' @return a list with a ggplot object, a description, and the desired size for the plot
#' @export
#'
#' @concept diagnostics
PlotModelCompareConv=function(data,label="4sU",estimator="Separate") {
# R CMD check guard for non-standard evaluation
`Binom p.conv` <- `TB-Binom p.err` <- `TB-Binom p.mconv` <- `TB-Binom shape` <- Subread <- NULL
tab=GetTableQC(data,"model.parameters")
tab$Condition=factor(tab$Condition,levels=unique(tab$Condition))
cond=unique(tab$Condition)
ncond=length(cond)
tab=tab[tab$Label==label & tab$Estimator==estimator,]
g=ggplot(tab,aes(`Binom p.conv`,etbeta(`TB-Binom p.err`,`TB-Binom p.mconv`,exp(`TB-Binom shape`),exp(-`TB-Binom shape`)),color=Subread))+cowplot::theme_cowplot()+
geom_point()+geom_abline()+scale_color_brewer(NULL,palette="Dark2")+ylab("Mean TB-Binom p.conv")
list(
plot=g,
description="Compares the estimated conversion rate (i.e., the probability for a conversion on a new RNA molecule) for the binom and tbbinom models.",
size=c(width=9,height=7)
)
}
#' Diagnostic plot for estimated models (log likelihoods)
#'
#' This belongs to the fourth kind (model comparison) of diagnostic plots
#'
#' Shows the difference in log likelihoods between the binom and tbbinom models.
#'
#' @param data a grandR object
#' @param label which label to consider (see \link{GetDiagnosticParameters}); cannot be NULL
#' @param estimator which estimator to consider (see \link{GetDiagnosticParameters}); cannot be NULL
#'
#' @return a list with a ggplot object, a description, and the desired size for the plot
#' @export
#'
#' @concept diagnostics
PlotModelCompareLL=function(data,label="4sU",estimator="Separate") {
# R CMD check guard for non-standard evaluation
`Binom log likelihood` <- `TB-Binom log likelihood` <- Subread <- NULL
tab=GetTableQC(data,"model.parameters")
tab$Condition=factor(tab$Condition,levels=unique(tab$Condition))
cond=unique(tab$Condition)
ncond=length(cond)
tab=tab[tab$Label==label & tab$Estimator==estimator,]
g=ggplot(tab,aes(Condition,`Binom log likelihood`-`TB-Binom log likelihood`,color=Subread))+cowplot::theme_cowplot()+
geom_point(position=position_dodge(width=0.2))+coord_cartesian(ylim=c(min(tab$`Binom log likelihood`-tab$`TB-Binom log likelihood`),0))+ylab("log likelihood ratio Binom/TB-Binom")+xlab(NULL)+scale_color_brewer(NULL,palette="Dark2")+theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+geom_hline(yintercept=0)+geom_hline(yintercept=-qchisq(0.95,df=1),linetype=2)
list(
plot=g,
description="Shows the difference in log likelihoods between the binom and tbbinom models.",
size=c(width=9,height=7)
)
}
#' Diagnostic plot for estimated models (global error rate)
#'
#' This belongs to the fifth kind (profile likelihoods) of diagnostic plots
#'
#' Shows the profile likelihoods for all parameters of the tbbinom model.
#'
#' @param data a grandR object
#' @param estimator which estimator to consider (see \link{GetDiagnosticParameters}); cannot be NULL
#' @param label which label to consider (see \link{GetDiagnosticParameters}); cannot be NULL
#' @param sample which sample to consider (see \link{GetDiagnosticParameters}); cannot be NULL
#' @param subread which subread to consider (see \link{GetDiagnosticParameters}); cannot be NULL
#'
#' @return a list with a ggplot object, a description, and the desired size for the plot
#' @export
#'
#' @concept diagnostics
PlotProfileLikelihood=function(data,label="4sU",estimator=NULL,sample=NULL,subread=NULL) {
if (is.null(estimator)) stop("No estimator defined; see GetDiagnosticParameters(data)$estimator for choices!")
if (is.null(sample)) stop("No sample defined; see GetDiagnosticParameters(data)$sample for choices!")
if (is.null(subread)) stop("No subread defined; see GetDiagnosticParameters(data)$subread for choices!")
tab=if (is.data.frame(data)) data else GetTableQC(data,"model.profile")
if (is.null(tab$Estimator)) tab$Estimator=estimator
tab=tab[tab$Condition==sample & tab$Subread==subread & tab$Label==label & tab$Estimator==estimator,]
plot1=function(x,y,ylab=y,xlab=NULL) {
t=tab[tab$Parameter==x,]
g=ggplot(t,aes(!!sym(x),!!sym(y)))+cowplot::theme_cowplot()+
ylab(ylab)+xlab(xlab)+theme(axis.text.x = element_text(angle = 40, vjust = 1, hjust=1))
if (y=="deltaLL") g=g+geom_hline(yintercept=0,color="grey",linetype=2)
g+geom_line()
}
g=cowplot::plot_grid(
plot1("ntr","deltaLL",ylab="log likelihood ratio",xlab="ntr"),
plot1("p.err","deltaLL",ylab="log likelihood ratio",xlab="p.err"),
plot1("p.mconv","deltaLL",ylab="log likelihood ratio",xlab="p.mconv"),
plot1("shape","deltaLL",ylab="log likelihood ratio",xlab="shape"),
cowplot::ggdraw(),
plot1("p.err","ntr"),
plot1("p.mconv","ntr"),
plot1("shape","ntr"),
plot1("ntr","p.err"),
cowplot::ggdraw(),
plot1("p.mconv","p.err"),
plot1("shape","p.err"),
plot1("ntr","p.mconv"),
plot1("p.err","p.mconv"),
cowplot::ggdraw(),
plot1("shape","p.mconv"),
plot1("ntr","shape"),
plot1("p.err","shape"),
plot1("p.mconv","shape"),
cowplot::ggdraw(),
align="hv",axis="b",ncol=4)
title=cowplot::ggdraw()+cowplot::draw_label(sprintf("%s (%s) - %s",sample,subread,label), size=15)
g=cowplot::plot_grid(title, g, ncol=1, rel_heights=c(0.1, 1))
list(
plot=g,
description="Shows the profile likelihoods for all parameters of the tbbinom model.",
size=c(width=21,height=16)
)
}
#' Convencience methods for creating QC pdfs
#'
#' These methods are invoked by GRAND3 to generate pdfs.
#'
#' @param data a grandR object
#' @param labels which label to consider (see \link{GetDiagnosticParameters}); if NULL, all available estimators are used
#' @param estimators which estimator to consider (see \link{GetDiagnosticParameters}); if NULL, all available estimators are used
#'
#' @return NULL
#' @export
#'
#' @concept diagnostics
#' @describeIn CreatePdfs Create all pdfs
CreatePdfs=function(data,labels=NULL,estimators=NULL) {
CreatePdfsParameters(data,labels,estimators)
CreatePdfsComparison(data,labels,estimators)
CreatePdfsProfiles(data,labels,estimators)
invisible(NULL)
}
#' @describeIn CreatePdfs Create pdfs visualizing the estimated parameters
#' @export
CreatePdfsParameters=function(data,labels=NULL,estimators=NULL) {
tab=GetTableQC(data,"model.parameters")
cond=unique(tab$Condition)
ncond=length(cond)
if (!is.null(labels)) tab=tab[tab$Label %in% labels,]
if (!is.null(estimators)) tab=tab[tab$Estimator %in% estimators,]
for (lab in unique(tab$Label)) {
for (estimator in unique(tab$Estimator)) {
grDevices::pdf(sprintf("%s.model.parameters.%s.%s.pdf",data$prefix,lab,estimator),width=3+ncond/2,height=7)
print(PlotModelNtr(data,label=lab,estimator=estimator,model="Binom")$plot+ggtitle(paste(lab," (binom)")))
print(PlotModelErr(data,label=lab,estimator=estimator,model="Binom")$plot+ggtitle(paste(lab," (binom)")))
print(PlotModelConv(data,label=lab,estimator=estimator,model="Binom")$plot+ggtitle(paste(lab," (binom)")))
if ("TB-Binom ntr" %in% names(tab)) {
print(PlotModelNtr(data,label=lab,estimator=estimator,model="TB-Binom")$plot+ggtitle(paste(lab," (tbbinom)")))
print(PlotModelErr(data,label=lab,estimator=estimator,model="TB-Binom")$plot+ggtitle(paste(lab," (tbbinom)")))
print(PlotModelConv(data,label=lab,estimator=estimator,model="TB-Binom")$plot+ggtitle(paste(lab," (tbbinom)")))
print(PlotModelShape(data,label=lab,estimator=estimator)$plot+ggtitle(paste(lab," (tbbinom")))
print(PlotModelLabelTimeCourse(data,label=lab,estimator=estimator)$plot+ggtitle(paste(lab," (tbbinom")))
}
g=grDevices::dev.off()
}}
invisible(NULL)
}
#' @describeIn CreatePdfs Create pdfs comparing the estimated parameters
#' @export
CreatePdfsComparison=function(data,labels=NULL,estimators=NULL) {
tab=GetTableQC(data,"model.parameters")
cond=unique(tab$Condition)
ncond=length(cond)
if (!is.null(labels)) tab=tab[tab$Label %in% labels,]
if (!is.null(estimators)) tab=tab[tab$Estimator %in% estimators,]
for (lab in unique(tab$Label)) {
for (estimator in unique(tab$Estimator)) {
grDevices::pdf(sprintf("%s.model.comparison.%s.%s.pdf",data$prefix,lab,estimator),width=9,height=7)
print(PlotModelCompareErrPrior(data,label=lab,estimator=estimator,model="Binom")$plot+ggtitle(paste(lab," (binom)")))
if ("TB-Binom ntr" %in% names(tab)) {
print(PlotModelCompareErrPrior(data,label=lab,estimator=estimator,model="TB-Binom")$plot+ggtitle(paste(lab," (tbbinom)")))
print(PlotModelCompareNtr(data,label=lab,estimator=estimator)$plot+ggtitle(lab))
print(PlotModelCompareErr(data,label=lab,estimator=estimator)$plot+ggtitle(lab))
print(PlotModelCompareConv(data,label=lab,estimator=estimator)$plot+ggtitle(lab))
print(PlotModelCompareLL(data,label=lab,estimator=estimator)$plot+ggtitle(lab))
}
g=grDevices::dev.off()
}}
invisible(NULL)
}
#' @describeIn CreatePdfs Create pdfs visualizing the profile likelihoods
#' @export
CreatePdfsProfiles=function(data,labels=NULL,estimators=NULL) {
para=GetTableQC(data,"model.parameters")
tab=GetTableQC(data,"model.profile")
if (is.null(tab)) return(invisible(NULL))
cond=unique(para$Condition)
ncond=length(cond)
if (!is.null(labels)) tab=tab[tab$Label %in% labels,]
if (!is.null(estimators)) tab=tab[tab$Estimator %in% estimators,]
para = para[para$Label %in% unique(tab$Label),]
para = para[para$Estimator %in% unique(tab$Estimator),]
# only para has the right order!
subs=GetDiagnosticParameters(data)$subread
for (lab in unique(para$Label)) {
for (estimator in unique(para$Estimator)) {
grDevices::pdf(sprintf("%s.model.profile.%s.%s.pdf",data$prefix,lab,estimator),width=21,height=16)
for (cond in unique(tab$Condition)) {
for (subread in subs) {
if (sum(tab$Label==lab & tab$Condition==cond & tab$Subread==subread & tab$Estimator==estimator)>0) {
print(PlotProfileLikelihood(tab,lab,estimator,cond,subread)$plot)
}
}
}
g=grDevices::dev.off()
}}
invisible(NULL)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.