Nothing
#' Missing At Random (MAR) proportion estimation based on technical replicates.
#'
#' MAR.est estimates the proportion of missing peaks at random (MAR) caused by preprocessing tools with exactly two technical replicates per sample.
#' @param abundance The full abundance matrix without missing value, with features in rows and samples in columns.
#' @param sample A vector of characters or integers. It is the sample name for each pair of replicates.
#' @param log.scale A scalar or vector of proportions. It is the total percentage of missing peaks throughout the full matrix.
#' @param violin.plot Logical, whether to generate violin and box plots to visualize abundance distribution of missing and nonmissing peaks.
#'
#' @import utils
#' @import ggplot2
#' @import reshape2
#'
#'
#' @return
#' \item{MAR.Proportion}{Estimated MAR proportion}
#' \item{plot}{Violin and box plots generated by ggplot2}
#' @examples
#' data('replicates')
#' # replicates contains mass specturm log abundance of 85 peptides
#' # with missing values for 4 pairs of technical replicates.
#'
#'
#' MAR=MAR.est(replicates,sample=rep(1:4,each=2),log.scale=FALSE,violin.plot=TRUE)
#' # Estimates the MAR proportion in the 4 pairs of replicates and output violin/box plots object.
#'
#' print(MAR$plot)
#' # Print violin/box plots
#'
#' @export
#' @importFrom stats na.omit predict
MAR.est<-function(abundance,sample,log.scale=TRUE,violin.plot=FALSE){
if(!requireNamespace('ggplot2')) {
install.packages('ggplot2')
if(!requireNamespace('ggplot2',character.only = TRUE)) stop("Package 'ggplot2' not found")}
if(!requireNamespace('reshape2')) {
install.packages('reshape2')
if(!requireNamespace('reshape2',character.only = TRUE)) stop("Package 'reshape2' not found")}
if(length(sample)!=ncol(abundance)){
stop('Abundance matrix colnames must match with sample index')
}
sample=as.character(sample)
if(sum(table(sample)!=2)>0){
stop('Use two replicates per sample')
}
replicate.name=paste(sort(sample),rep(c('a','b'),length(sample)/2),sep = '.')
abundance=abundance[,order(sample)]
colnames(abundance)=replicate.name
abundance[abundance==0]=NA
if(log.scale){
edata=log2(abundance)
}else {edata=abundance}
n_edata=nrow(edata)
n_sample=ncol(edata)
result_edata=matrix(NA,ncol=n_sample,nrow=n_edata)
colnames(result_edata)=colnames(edata)
for (i in 1:(n_sample/2)){
keep=which(!is.na(edata[,(2*i-1)])) #index for non-NA obs
result_edata[,(2*i)]=replace(edata[,(2*i)],keep,NA) #keep associated value for each missing replicate
keep1=which(!is.na(edata[,(2*i)]))
result_edata[,(2*i-1)]=replace(edata[,(2*i-1)],keep1,NA)
}
result_edata=as.data.frame(result_edata)
test_edata=matrix(NA,nrow=n_edata,ncol=n_sample/2)
colnames(test_edata)=colnames(result_edata)[seq(1,n_sample,2)]
for (i in 1:(n_sample/2)){
y1=apply(result_edata[,(2*i-1):(2*i)],1,function(x)sum(x,na.rm = TRUE))
y1[y1==0]=NA
n_row=length(na.omit(y1))
test_edata[1:n_row,i]=as.vector(na.omit(y1))
}
test_edata=as.data.frame(test_edata)
nomiss_edata=matrix(NA,ncol=n_sample,nrow=n_edata)
colnames(nomiss_edata)=colnames(edata)
for (i in 1:(n_sample/2)){
keep=which(is.na(edata[,(2*i-1)])) #index for non-NA obs
nomiss_edata[,(2*i)]=replace(edata[,(2*i)],keep,NA) #keep associated value for each missing replicate
keep1=which(is.na(edata[,(2*i)]))
nomiss_edata[,(2*i-1)]=replace(edata[,(2*i-1)],keep1,NA)
}
nomiss_edata=as.data.frame(nomiss_edata)
colnames(test_edata)=paste('Missing',colnames(test_edata),sep = '.')
colnames(nomiss_edata)=paste('Full',colnames(nomiss_edata),sep = '.')
random.pct=colSums(!is.na(test_edata))/dim(edata)[1]
names(random.pct)=paste('S',names(table(sample)),sep = '')
if(violin.plot){
all.data=cbind(nomiss_edata,test_edata)
plot.data=melt(all.data)
plot.data=plot.data[!is.na(plot.data$value),]
plot.data$variable=droplevels(plot.data$variable)
plot.data$Compound=ifelse(grepl('Missing',plot.data$variable),'Random Missing','Full')
plot.data$sample=gsub('Full','S',plot.data$variable)
plot.data$sample=gsub('Missing','S',plot.data$sample)
plot.data$sample=gsub('.a','',plot.data$sample)
plot.data$sample=gsub('.b','',plot.data$sample)
dodge <- position_dodge(width = 0.8)
p=ggplot(plot.data,aes(x=sample, y=plot.data$value,color=plot.data$Compound))+geom_violin(position = dodge)+geom_boxplot(width=0.1,position = dodge)+theme(legend.position="bottom")
p=p+labs(x='Replicates Pair',y='Log Abundance')
}
if(violin.plot){
return(list(MAR.Proportion=random.pct,plot=p))
} else{return(random.pct)}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.