R/reliabilityCheckSwath.R

Defines functions reliabilityCheckSwath

Documented in reliabilityCheckSwath

reliabilityCheckSwath <- function(seed.swathfile, ext.swathfile, max.fdrpass = 3, max.peptide = 2)
{

flog.threshold(ERROR)

dfdr = lapply(c(seed.swathfile, ext.swathfile), readWorkbook, sheet='FDR')
dfdr = lapply(dfdr, fdr.crit)  


# fdr density distribution

ds.seed.list = list()
ds.ext.list = list()


for(nfdr in 1:max.fdrpass) {

	ds.seed = dfdr[[1]][dfdr[[1]]$nfdr.pass >= nfdr,-ncol(dfdr[[1]])]
	ds.ext = dfdr[[2]][dfdr[[2]]$nfdr.pass >= nfdr,-ncol(dfdr[[2]])]

	ds.seed.list[[nfdr]] = ds.seed
	ds.ext.list[[nfdr]] = ds.ext
}


png('plot fdr density.png', 3000, 2000, res=300)
layout(matrix(1:(2*max.fdrpass), nrow=2))
for(nfdr in 1:max.fdrpass) {
	plot(density(as.vector(as.matrix(ds.seed.list[[nfdr]][, -c(1:7)]))), 
		main=paste('Seed FDR pass', nfdr, '\n#protein', 
		length(unique(ds.seed.list[[nfdr]][,1]))), cex.main=.8, ylab='FDR density',xlab='')
	plot(density(as.vector(as.matrix(ds.ext.list[[nfdr]][, -c(1:7)]))), 
		main=paste('Extended FDR pass', nfdr, '\n#protein', 
		length(unique(ds.ext.list[[nfdr]][,1]))), cex.main=.8,ylab='FDR density',xlab='')
}
dev.off()	


# 8 fdr bins
list.fdr.bins = list()
Bins = c(0, .01, .1, .2, .3, .4, .5, .8, 1)

all.list = list(ds.seed.list, ds.ext.list)

for(ii in  1:2 ) {


ds.list = all.list[[ii]]

fdr.bins = matrix(NA, nrow=8, ncol=max.fdrpass)

rownames(fdr.bins) = sapply(1:8, function(x) paste(Bins[x], '-', Bins[x+1]) )
colnames(fdr.bins) = paste('FDR', 1:max.fdrpass)

	for(nfdr in 1:max.fdrpass) {
		ds = ds.list[[nfdr]]			
		v = getFdrBins(as.matrix(ds[, -c(1:7)]), Bins)
		
		#check
		all(sum(v) == 100)
		fdr.bins[,nfdr] = v
	
	}

list.fdr.bins[[ii]] = fdr.bins
}


Label = c('Seed','Extended')
Cols = gray.colors(length(Bins))

legend.x = 6.5

						
png('fdr bins barplot.png', 4000, 1500, res=300) 

layout(matrix(c(1,2), ncol=2))
par(mar=c(5,5,4,8)+.1)
par(xpd=TRUE)

bp1 = barplot(list.fdr.bins[[2]], col=Cols, main=paste(Label[2], 'Swath'), 
	width=.5, ylab='%FDR bin', las=2)

legend(bp1[length(bp1)]+.5,80, fill=Cols, legend=rownames(list.fdr.bins[[1]]), title='FDR Bins')

par(mar=c(5,2,4,8)+.1)
par(xpd=TRUE)	
barplot(list.fdr.bins[[1]], col=Cols, main=paste(Label[1], 'Swath FDR'), las=2)

dev.off()


# peptide and protein venn at fdr.pass=1

list.vn.pep = list()
list.vn.prot = list()

nfdr = 1

ds.seed = ds.seed.list[[nfdr]]
ds.ext = ds.ext.list[[nfdr]]

list.vn.pep[[nfdr]] = venn.diagram(list(seedSwath=ds.seed$Peptide, extSwath=ds.ext$Peptide), 
	file='venn of peptide.png',  category.names=c('seed', 'extended'),
	fill=c("aquamarine1", "chartreuse"), main=paste('Peptides at FDR pass', nfdr) )
	
list.vn.prot[[nfdr]] = venn.diagram(list(seedSwath=ds.seed$Protein, extSwath=ds.ext$Protein), 
	file='venn of protein.png', category.names=c('seed', 'extended'),
	fill=c("aquamarine1", "chartreuse"), main=paste('Proteins at FDR pass', nfdr) )



# matrices of protein and peptide numbers with fdr pass
mat.prot = matrix(NA, nrow=max.fdrpass, ncol=3)
mat.pep = matrix(NA, nrow=max.fdrpass, ncol=3)

for(nfdr in 1:max.fdrpass) {
	ds.seed = ds.seed.list[[nfdr]]
	ds.ext = ds.ext.list[[nfdr]]

	mat.prot[nfdr,] = c(length(unique(ds.ext$Protein)), length(unique(ds.seed$Protein)),
			length(intersect(ds.ext$Protein, ds.seed$Protein)) )
	mat.pep[nfdr,] = c(length(unique(ds.ext$Precursor.MZ)), length(unique(ds.seed$Precursor.MZ)),
			length(intersect(ds.ext$Precursor.MZ, ds.seed$Precursor.MZ)) )
			
}
	
rownames(mat.prot) = c('Default (>=1 peptide, 1 FDR pass)', paste("fdr.pass", 2:max.fdrpass) )
colnames(mat.prot) = paste(c("extended", "seed", "common"), 'protein')
rownames(mat.pep) = c('Default (>=1 peptide, 1 FDR pass)', paste("fdr.pass", 2:max.fdrpass) )
colnames(mat.pep) = paste(c("extended", "seed", "common"), 'peptide')

Colours = terrain.colors(max.fdrpass)

png("barplots FDR pass.png", 2000, 2000, res=300)
layout(matrix(1:2, ncol=2) )
par(mar=c(8,4,4,2)+.1)
bp1 = barplot(mat.prot[,-3], beside=TRUE, col=Colours, las=2,
	main="Proteins filtered by FDR")
	
bp2 = barplot(mat.pep[,-3], beside=TRUE, col=Colours, las=2,
	main="Peptides filtered by FDR")

legend('topright', fill=Colours, legend=paste("fdr.pass", 1:max.fdrpass))

dev.off()

dat.protpep = cbind(mat.pep, mat.prot)

#################################################
# Filter swath results with number of peptides
#################################################

# at lease one sample pass fdr <= 0.01

dswat = lapply(c(seed.swathfile, ext.swathfile), function(x) readWorkbook(x, 2) )

dswat = lapply(dswat, function(x) { agg = aggregate(x$Peptide, by=list(x$Protein), length);
								x$npep = agg[match(x$Protein, agg[,1]), 2]; x} )


mat.prot = matrix(NA, nrow=max.peptide-1, ncol=3)
mat.pep = matrix(NA, nrow=max.peptide-1, ncol=3)
	
for(n in 2:max.peptide) {
	ds.seed = dswat[[1]][dswat[[1]]$npep >= n,]
	ds.ext = dswat[[2]][dswat[[2]]$npep >= n,]

	mat.prot[n-1,] = c(length(unique(ds.ext$Protein)), length(unique(ds.seed$Protein)),
			length(intersect(ds.ext$Protein, ds.seed$Protein)) )
	mat.pep[n-1,] = c(length(unique(ds.ext$Precursor.MZ)), length(unique(ds.seed$Precursor.MZ)),
			length(intersect(ds.ext$Precursor.MZ, ds.seed$Precursor.MZ)) )
			
}
	
	
rownames(mat.prot) = paste("peptide.pass", 2:max.peptide)
colnames(mat.prot) = paste(c("extended", "seed", "common"), 'protein')
rownames(mat.pep) = paste("peptide.pass", 2:max.peptide)
colnames(mat.pep) = paste(c("extended", "seed", "common"), 'peptide')


dat.pepfilter = cbind(mat.pep, mat.prot)								

dat.protpep = rbind(dat.protpep, dat.pepfilter)

 
##########################################################################
# Quantitation consistency between seed and ext-based SWATH
##########################################################################

fdr.seed = dfdr[[1]][dfdr[[1]]$nfdr.pass > 0,]
fdr.ext = dfdr[[2]][dfdr[[2]]$nfdr.pass > 0,]

swa.seed = dswat[[1]][,-ncol(dswat[[1]])]
swa.ext = dswat[[2]][,-ncol(dswat[[2]])]

# check fdr and peptide quantification peptide match 

all(fdr.seed$Peptide==swa.seed$Peptide)
all(fdr.ext$Peptide==swa.ext$Peptide)

stopifnot(ncol(swa.seed) == ncol(swa.ext))

mat.seed = swa.seed[,-c(1:5)]
mat.ext = swa.ext[,-c(1:5)]


Sample = colnames(mat.seed)

# correlation

res.cor = list()

method.res = list()

quant.checkmethod = c('cor', 'bland.altman', 'cv')

for(method in quant.checkmethod ) {
	vcor.list = list()

	for(npass in 1:max.fdrpass) 	{

		res = quantification.accuracy(swa.seed[fdr.seed$nfdr.pass >= npass,], 
								swa.ext[fdr.ext$nfdr.pass >= npass,], method=method,
								Sample=Sample)

		vcor.list[[npass]] = res$vcor
	}

	method.res[[method]] = vcor.list
	
	# median correlation of sample correlation

	dat.medcor = data.frame(sapply(vcor.list, median))
	rownames(dat.medcor) = paste('fdr pass', 1:max.fdrpass )
	colnames(dat.medcor) = 'median'

	# correlation with peptide filter

	vcor.list = list()

	for(npass in 2:max.peptide) 
	{

		res = quantification.accuracy(dswat[[1]][dswat[[1]]$npep >= npass,-ncol(dswat[[1]])], 
								dswat[[2]][dswat[[2]]$npep >= npass,-ncol(dswat[[2]])], method = method,
								Sample=Sample)

		vcor.list[[npass-1]] = res$vcor
		
		# ttest.list[[npass]] = try(t.test(res$vcor, res$rcor, paired=TRUE)$p.value)

	}

	dat.medcor.pep = data.frame(sapply(vcor.list, median))
	rownames(dat.medcor.pep) = paste('peptide pass', 2:max.peptide )
	colnames(dat.medcor.pep) = 'median'

	dat.medcor = rbind(dat.medcor, dat.medcor.pep)
	
	res.cor[[method]] = dat.medcor

}

	
for(ii in 1:3) {

	method = quant.checkmethod[ii]
	vcor.list = method.res[[ii]]
	
	
	png(paste(method, "boxplot.png"), 3000, 1500, res=300)
	boxplot(vcor.list, names=paste("fdr pass", 1:max.fdrpass), 
		main=paste("Protein quantification consistency between the seed and extended\nmeasured by", method), 
		ylab=paste(method,"of log peak area"), col=Colours, las=2, notch=TRUE)

	dev.off()

}


dat.comb = Reduce(cbind, res.cor)
colnames(dat.comb) = paste(rep(names(res.cor), each=1), colnames(dat.comb))

dat.comb = cbind(dat.protpep, dat.comb)

write.csv(dat.comb, 'table protpep num and correlation.csv')


list(fdr.bins = list.fdr.bins, dat.comb = dat.comb)

}

	
	

Try the SwathXtend package in your browser

Any scripts or data that you put into this service are public.

SwathXtend documentation built on Nov. 8, 2020, 6:42 p.m.