library(lattice) library(knitr) library(parallel) library(NestLink) cv <- 1 - 1:7 / 10 t <- trellis.par.get("strip.background") t$col <- (rgb(cv,cv,cv)) trellis.par.set("strip.background", t) n.simulation <- 10
# filename <- system.file("extdata/PGexport2_normalizedAgainstSBstandards_Peptides.csv", # package = "NestLink") # library(ExperimentHub) # eh <- ExperimentHub() # filename <- query(eh, c("NestLink", "PGexport2_normalizedAgainstSBstandards_Peptides.csv"))[[1]] filename <- getExperimentHubFilename("PGexport2_normalizedAgainstSBstandards_Peptides.csv") P <- read.csv(filename, header = TRUE, sep=';') dim(P)
remove modifications
P <- P[P$Modifications == '', ] dim(P)
select rows
P <- P[,c('Accession', 'Sequence', "X20170919_05_62465_nl5idx1.3_6titratecoli", "X20170919_16_62465_nl5idx1.3_6titratecoli", "X20170919_09_62466_nl5idx1.3_7titratesmeg", "X20170919_14_62466_nl5idx1.3_7titratesmeg")] dim(P)
rename column names
names(P)<-c('Accession','Sequence','coli1', 'coli2', 'smeg1', 'smeg2') dim(P)
remove all rows with invalid identidfier
P<- P[grep("^P[0-9][A-Z][0-9]", P$Accession), ] P<-droplevels(P)
add flycode set annotation
P$ConcGr <- NA P$ConcGr[P$Accession %in% c('P1A4', 'P1B4', 'P1C4', 'P1D4', 'P1E4', 'P1F4')] <- 92 P$ConcGr[P$Accession %in% c('P1A5', 'P1B5', 'P1C5', 'P1D5', 'P1G4', 'P1H4')] <- 295 P$ConcGr[P$Accession %in% c('P1A6', 'P1B6', 'P1E5', 'P1F5', 'P1G5', 'P1H5')] <- 943 P$ConcGr[P$Accession %in% c('P1C6', 'P1D6', 'P1E6', 'P1F6', 'P1G6', 'P1H6')] <- 3017
table(P$ConcGr) Pabs <- P
table(P$Accession)
P.summary <- aggregate(. ~ ConcGr * Accession, data=P[,c('Accession', 'coli1', 'coli2', 'smeg1', 'smeg2', 'ConcGr')], FUN=sum) P.summary$nDetectedFlycodes <- aggregate(Sequence ~ ConcGr * Accession, data=na.omit(P), FUN=length)[,3] P.summary$nTotalFlycodes <- 30 P.summary$coverage <- round(100 * P.summary$nDetectedFlycodes / P.summary$nTotalFlycodes) kable(P.summary[order(P.summary$ConcGr),], row.names = FALSE) # write.csv(P.summary, file='Figs/FigureS6b.csv')
The following function defines the computation and rendering of the by the
paris
function called panels.
panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...) { usr <- par("usr"); on.exit(par(usr)) par(usr = c(0, 1, 0, 1)) r <- abs(cor(x, y)) txt <- format(c(r, 0.123456789), digits = digits)[1] txt <- paste0(prefix, txt) if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt) text(0.5, 0.5, txt, cex = cex.cor * r) }
rv <-lapply(unique(P$ConcGr), function(q){ pairs((P[P$ConcGr == q ,c('coli1', 'coli2', 'smeg1', 'smeg2')]), pch=16, col=rgb(0.5,0.5,0.5,alpha = 0.3), lower.panel = panel.cor, asp=1, main=q) })
FCfill2max
to conduct a fair random experiment we add dummy flycodes
to the input data.frame
.
FCfill2max <- function(P, n = max(table(P$Accession))){ for (i in unique(P$Accession)){ idx <- which(P$Accession == i) # determine the number of missing rows for Accession i ndiff <- n - length(idx) if(length(idx) < n){ cand <- P[idx[1], ] cand[,2 ] <- "xxx" cand[,3:6 ] <- NA for (j in 1:ndiff){ P <- rbind(P, cand) } } } P }
P.mean <- apply(P[, c('coli1', 'coli2', 'smeg1', 'smeg2')],1, FUN=mean) P.sd <- apply(P[, c('coli1', 'coli2', 'smeg1', 'smeg2')],1, FUN=sd) boxplot(100*P.sd/P.mean ~ P$ConcGr,log='y', ylab='CV%')
P <- FCfill2max(P)
FlycodeAbsoluteStatistics
using coli1
columnFlycodeAbsoluteStatistics <- function(P){ PP <- aggregate(P$coli1 ~ P$Accession + P$ConcGr, FUN=sum) names(PP) <- c('Accession', 'ConcGr', 'coli1_sum') PPP <- aggregate(PP$coli1_sum ~ PP$ConcGr, FUN=mean) P.mean <- aggregate(PP$coli1_sum ~ PP$ConcGr, FUN=mean) P.sd <- aggregate(PP$coli1_sum ~ PP$ConcGr, FUN=sd) P.cv <- aggregate(PP$coli1_sum ~ PP$ConcGr, FUN = function(x){ 100 * sd(x) / mean(x) }) P.length <- max(aggregate(P$coli1 ~ P$Accession, FUN=length)[,2]) rv <- data.frame(ConcGr=P.sd[,1], mean=P.mean[,2], sd=P.sd[,2], cv=round(P.cv[,2],2)) rv$nFCAccession <- P.length rv }
kable(FlycodeAbsoluteStatistics(P))
FCrandom
# TODO(cp); make it work for n = 0 FCrandom <- function(P, n = 1){ if(n == 0){ return (P) } for (i in unique(P$Accession)){ idx <- which(P$Accession == i) stopifnot(length(idx) >= n) smp <- sample(length(idx), n) P <- P[-idx[smp],] } P$n <- n P }
set.seed(8) S <- do.call('rbind', lapply(1:29, function(i){ FlycodeAbsoluteStatistics(FCrandom(P, i)) }))
xyplot(cv ~ nFCAccession | ConcGr, data = S, layout = c(4, 1), strip = strip.custom(strip.names = TRUE, strip.levels = TRUE) )
set.seed(1) S.rep <- do.call('rbind', lapply(1:n.simulation, function(s){ S <- do.call('rbind', lapply(1:29, function(i){ FlycodeAbsoluteStatistics(FCrandom(P, i)) })) }))
NestLink_absolute_flycode_simulation <- xyplot(cv ~ nFCAccession | ConcGr, data = S.rep, panel = function(x,y,...){ panel.abline(h=c(10, 50), col='red') panel.xyplot(x, y, ...) xy.median <- (aggregate(y, by=list(x), FUN=median, na.rm = TRUE)) xy.quantile <- aggregate(y, by=list(x), FUN=function(d){quantile(d, c(0.05, 0.5, 0.95), na.rm = TRUE)}) panel.points( xy.median[,1], xy.median[,2], pch=16, cex=0.5) # print((xy.quantile[,2])[,3]) panel.points( xy.quantile[,1],(xy.quantile[,2])[,1], pch='-') panel.points( xy.quantile[,1],(xy.quantile[,2])[,3], pch='-') }, xlab= 'Number of flycodes', ylab ='CV [%]', strip = strip.custom(strip.names = TRUE, strip.levels = TRUE), scales=list(x=list(at=c(1,5,10,15,20,25,30)), y=list(at=c(0,10,50,100,150,200))), ylim=c(0,175), pch=16, col=rgb(0.5, 0.5, 0.5, alpha = 0.01), layout = c(4,1)) print(NestLink_absolute_flycode_simulation) # png("NestLink_absolute_flycode_simulation.png", 1200,800) # print(NestLink_absolute_flycode_simulation) # dev.off()
P <- na.omit(P) P <- P[P$coli1 >0,] P$coli1 <- P$coli1 / sum(P$coli1) P$coli2 <- P$coli2 / sum(P$coli2) P$smeg1 <- P$smeg1 / sum(P$smeg1) P$smeg2 <- P$smeg2 / sum(P$smeg2)
sanity check
sum(P$coli1) sum(P$coli2) sum(P$smeg1) sum(P$smeg2)
P$ratios <- (0.5* (P$coli1 + P$coli2)) / (0.5 * (P$smeg1 + P$smeg2))
b <- boxplot(P$coli1 / P$coli2, P$coli1 / P$smeg1, P$coli1 / P$smeg2,P$coli2 / P$smeg1, P$coli2 / P$smeg2 , P$smeg1 / P$smeg2, P$ratios, ylab='ratios', ylim = c(0,2 )) axis(1, 1:6, c('coli[12]', 'coli1-smeg1', 'coli1-smeg2', 'coli2-smeg1', 'coli2- smeg2','smeg[12]'))
op <- par(mfrow = c(1, 4)) boxplot(P$coli1 ~ P$ConcGr) boxplot(P$coli2 ~ P$ConcGr) boxplot(P$smeg1 ~ P$ConcGr) boxplot(P$smeg2 ~ P$ConcGr)
op <- par(mfrow=c(1,1), mar=c(5,5,5,2) ) b <- boxplot(df<-cbind(P$coli1/P$coli2, P$coli1/P$smeg1, P$coli1/P$smeg2, P$coli2/P$smeg1, P$coli2/P$smeg2, P$smeg1/P$smeg2, P$ratios), log='y', ylab='normalized ratios', #ylim = c(0, 2), axes=FALSE, main=paste("ConcGr = all")) axis(1, 1:7, c('coli[12]', 'coli1-smeg1', 'coli1-smeg2', 'coli2-smeg1', 'coli2- smeg2','smeg[12]', 'ratio')) abline(h=1, col='red') box() axis(2) #axis(3, 1:7, b$n) outliers.idx <- sapply(1:length(b$group), function(i){ q <- df[, b$group[i]] == b$out[i]; text(b$group[i], b$out[i], P[q, 2], pos=4, cex=0.4); text(b$group[i], b$out[i], P[q, 1], pos=2, cex=0.4); which(q)})
b <- boxplot(df<-cbind(P$coli1/P$coli2, P$coli1/P$smeg1, P$coli1/P$smeg2, P$coli2/P$smeg1, P$coli2/P$smeg2, P$smeg1/P$smeg2, P$ratio), log='', ylab='normalized ratios', ylim = c(0, 2), axes=FALSE, main=paste("ConcGr = all")) axis(1, 1:7, c('coli[12]', 'coli1-smeg1', 'coli1-smeg2', 'coli2-smeg1', 'coli2- smeg2','smeg[12]', 'ratio')) abline(h=1, col='red') box() axis(2) axis(3, 1:length(b$n), b$n) outliers.idx <- sapply(1:length(b$group), function(i){ q <- df[, b$group[i]] == b$out[i]; text(b$group[i], b$out[i], P[q, 2], pos=4, cex=0.4); text(b$group[i], b$out[i], P[q, 1], pos=2, cex=0.4); which(q)})
kable(P[unique(outliers.idx),])
rv <-lapply(unique(P$ConcGr), function(q){ pairs((P[P$ConcGr == q ,c('coli1', 'coli2', 'smeg1', 'smeg2')]), pch=16, col=rgb(0.5,0.5,0.5,alpha = 0.3), lower.panel = panel.cor, asp=1, main=q) })
bwplot(ratios ~ Accession | ConcGr, data = P, strip = strip.custom(strip.names = TRUE, strip.levels = TRUE), panel = function(...){ panel.abline(h=1, col='red') panel.bwplot(...) }, ylim=c(-0,2), scales = list(x = list(relation = "free", rot=45)), layout = c(4,1)) bwplot(ratios ~ ConcGr, data = P, horizontal = FALSE, panel = function(...){ panel.abline(h=1, col='red') panel.bwplot(...) }, ylim=c(0,2), scales = list(x = list(relation = "free", rot=45)), layout = c(1,1))
boxplot(ratios ~ ConcGr,data=P,ylim=c(0,2)) abline(h=1, col=rgb(1,0,0,alpha=0.4))
P<-na.omit(P) xyplot(ratios ~ Accession | ConcGr, data = P, strip = strip.custom(strip.names = TRUE, strip.levels = TRUE), panel =function(x,y, ...){ panel.abline(h=mean(y), col='red') panel.xyplot(x,y, pch=16, col=rgb(0.5,0.5,0.5,alpha = 0.5)) xy.mean <- (aggregate(y, by=list(x), FUN=mean, na.rm = TRUE)) xy.sd <- (aggregate(y, by=list(x), FUN=sd, na.rm = TRUE)) panel.points( xy.mean[,1], xy.mean[,2]) panel.points( xy.mean[,1], xy.mean[,2] + xy.sd[,2] , pch='-', col='red', cex=4) panel.points( xy.mean[,1], xy.mean[,2] - xy.sd[,2] , pch='-', col='red', cex=4)}, ylim=c(0,2), scales = list(x = list(relation = "free", rot=45)), layout = c(4,1))
P <- na.omit(P) xyplot(ratios ~ Accession | ConcGr, data = P, strip = strip.custom(strip.names = TRUE, strip.levels = TRUE), panel = function(x,y, ...){ panel.abline(h=mean(y), col='red') panel.xyplot(x,y, pch=16, col=rgb(0.5,0.5,0.5,alpha = 0.5)) xy.mean <- (aggregate(y, by=list(x), FUN=mean, na.rm = TRUE)) xy.sd <- (aggregate(y, by=list(x), FUN=sd, na.rm = TRUE)) panel.points( xy.mean[,1], xy.mean[,2]) panel.points( xy.mean[,1], xy.mean[,2] + xy.sd[,2] , pch='-', col='red', cex=4) panel.points( xy.mean[,1], xy.mean[,2] - xy.sd[,2] , pch='-', col='red', cex=4) ltext(xy.mean[,1], (xy.mean[,2] + xy.sd[,2]) , round(xy.sd[,2],2), pos=3, cex=0.5) }, layout = c(4,1), scales = list(y=list(log=TRUE), x = list(relation = "free", rot=45)), )
# P.cv <- aggregate(P$ratios ~ P$Accession, FUN=function(x){100*sd(x)/mean(x)})
P<-na.omit(P) trellis.par.set("strip.background",t) xyplot(ratios ~ ConcGr | ConcGr, data = P, strip = strip.custom(strip.names = TRUE, strip.levels = TRUE), panel = function(x,y){ panel.abline(h=1, col='red') panel.xyplot(x,y, pch=16, col=rgb(0.5,0.5,0.5,alpha = 0.5)) panel.points(x, mean(y)) # panel.points(x, median(y),col='green') panel.points(x, mean(y) + sd(y), pch='-', col='red',cex=5) ltext(x, mean(y) + sd(y), round(sd(y),3), pos=4) panel.points(x, mean(y) - sd(y), pch='-', col='red',cex=5) }, #ylim=c(-1,3), scales = list(x = list(relation = "free", rot=45)), layout = c(4,1))
outlier.idx <- which(P$ratios > 2) P[outlier.idx, ] # We do not remove outliers. # P <- P[-outlier.idx,]
P<-na.omit(P) trellis.par.set("strip.background",t) xyplot(ratios ~ ConcGr | ConcGr, data = P, strip = strip.custom(strip.names = TRUE, strip.levels = TRUE), panel = function(x,y){ panel.abline(h=1, col='red') panel.xyplot(x,y, pch=16, col=rgb(0.5,0.5,0.5,alpha = 0.5)) panel.points(x, mean(y)) # panel.points(x, median(y),col='green') panel.points(x, mean(y) + sd(y), pch='-', col='red',cex=5) ltext(x, mean(y) + sd(y), round(sd(y),3), pos=4) panel.points(x, mean(y) - sd(y), pch='-', col='red',cex=5) }, #ylim=c(-1,3), scales = list(x = list(relation = "free", rot=45)), layout = c(4,1))
FlycodeRelativeStatistics
to make the random experiment fair!
P <- FCfill2max(P)
FlycodeRelativeStatistics <- function(X, mode='bio'){ nFlycodesConcGr <- aggregate(X$Sequence ~ X$ConcGr, FUN=length) names(nFlycodesConcGr) <- c('ConcGr', 'nFlycodesConcGr') nFlycodesAccession.max <- max(aggregate(X$Sequence ~ X$Accession, FUN=length)[,2]) P.sum.coli1 <- aggregate(X$coli1 ~ X$Accession + X$ConcGr, FUN=sum) P.sum.coli2 <- aggregate(X$coli2 ~ X$Accession + X$ConcGr, FUN=sum)[,3] P.sum.smeg1 <- aggregate(X$smeg1 ~ X$Accession + X$ConcGr, FUN=sum)[,3] P.sum.smeg2 <- aggregate(X$smeg2 ~ X$Accession + X$ConcGr, FUN=sum)[,3] X <- P.sum.coli1 names(X) <- c('Accession', 'ConcGr', 'coli1') X$coli2 <- P.sum.coli2 X$smeg1 <- P.sum.smeg1 X$smeg2 <- P.sum.smeg2 if(!"ratios" %in% names(X)){ if (mode == 'tech_smeg'){ X$ratios <- X$smeg1 / X$smeg2 } else if(mode == 'tech_coli'){ X$ratios <- X$coli1 / X$coli2 }else{ # bio X$ratios <- ((0.5 * (X$coli1 + X$coli2)) / (0.5 * (X$smeg1 + X$smeg2))) } #warning("define ratios.") } #nFlycodesConcGr <- aggregate(X$Sequence ~ X$ConcGr, FUN=length) #names(nFlycodesConcGr) <- c('ConcGr', 'nFlycodesConcGr') X <- na.omit(X) P.mean <- aggregate(X$ratios ~ X$ConcGr, FUN=mean) names(P.mean) <- c('ConcGr', 'mean') P.median <- aggregate(X$ratios ~ X$ConcGr, FUN=median) names(P.median) <- c('ConcGr', 'median') P.sd <- aggregate(X$ratios ~ X$ConcGr, FUN=sd) names(P.sd) <- c('ConcGr', 'sd') rv <- data.frame(ConcGr=P.mean$ConcGr, median=P.median$median, mean=P.mean$mean, sd=P.sd$sd) # nFlycodesConcGr=nFlycodesConcGr[,2]) rv$cv <- 100 * rv$sd / rv$mean rv$length <- nFlycodesAccession.max rv }
message(paste("Number of simulations =", n.simulation)) set.seed(1) P.replicate <- do.call('rbind', lapply(1:n.simulation, function(run){ do.call('rbind', lapply(1:29, function(i) { rv <- FlycodeRelativeStatistics(FCrandom(P, i)) rv$run = run rv })) }))
set.seed(1) P.replicate.smeg <- do.call('rbind', lapply(1:n.simulation/2, function(run){ do.call('rbind', lapply(1:29, function(i) { rv <- FlycodeRelativeStatistics(FCrandom(P, i), mode='tech_smeg') rv$run = run rv })) }))
set.seed(1) P.replicate.coli <- do.call('rbind', lapply(1:n.simulation/2, function(run){ do.call('rbind', lapply(1:29, function(i) { rv <- FlycodeRelativeStatistics(FCrandom(P, i), mode='tech_coli') rv$run = run rv })) }))
xyplot(mean ~ length | ConcGr, data=P.replicate, panel = function(...){ panel.abline(h=1, col='red') panel.xyplot(...) }, strip = strip.custom(strip.names = TRUE, strip.levels = TRUE), ylim=c(0,2), pch=16, col=rgb(0.5, 0.5, 0.5, alpha = 0.1), layout = c(4,1), xlab= 'number of FlyCodes', ) xyplot(sd ~ length | ConcGr, data=P.replicate, panel = function(...){ panel.xyplot(...) }, strip = strip.custom(strip.names = TRUE, strip.levels = TRUE), pch=16, col=rgb(0.5, 0.5, 0.5, alpha = 0.1), layout = c(4,1), xlab= 'number of FlyCodes', ) xyplot(cv ~ length | ConcGr, data=P.replicate, panel = function(...){ panel.xyplot(...) }, strip = strip.custom(strip.names = TRUE, strip.levels = TRUE), pch=16, col=rgb(0.5, 0.5, 0.5, alpha = 0.01), layout = c(4,1), xlab= 'number of FlyCodes', ylab ='cv [in %]' )
SIM <- P.replicate SIM$Type <- "Biological replicates" P.replicate.coli$Type <- "Technical replicates" P.replicate.smeg$Type <- "Technical replicates" NestLink_relative_flycode_simulation <- xyplot(cv ~ length | ConcGr, index.cond=list(c(1,2,3,4)), data=do.call('rbind', list(SIM, P.replicate.coli, P.replicate.smeg)), subset = Type == "Biological replicates", panel = function(x,y,...){ panel.abline(h=10, col='red') panel.xyplot(x, y, ...) xy.median <- (aggregate(y, by=list(x), FUN=median, na.rm = TRUE)) xy.quantile <- aggregate(y, by=list(x), FUN=function(d){quantile(d, c(0.05, 0.5, 0.95), na.rm = TRUE)}) panel.points( xy.median[,1], xy.median[,2], pch=16, cex=0.5) # print((xy.quantile[,2])[,3]) panel.points( xy.quantile[,1],(xy.quantile[,2])[,1], pch='-') panel.points( xy.quantile[,1],(xy.quantile[,2])[,3], pch='-') }, strip = strip.custom(strip.names = TRUE, strip.levels = TRUE), scales=list(x=list(at=c(1,5,10,15,20,25,30)), y=list(at=c(0, 10,20,30,40,50))), pch=16, col=rgb(0.5, 0.5, 0.5, alpha = 0.01), layout = c(4, 1), ylim = c(0, 50), xlab= 'Number of flycodes', ylab ='CV [%]' ) print(NestLink_relative_flycode_simulation) NestLink_relative_flycode_simulation <- xyplot(cv ~ length | ConcGr, index.cond=list(c(1,2,3,4)), data=do.call('rbind', list(SIM, P.replicate.coli, P.replicate.smeg)), subset = Type == "Technical replicates", panel = function(x,y,...){ panel.abline(h=10, col='red') panel.xyplot(x, y, ...) xy.median <- (aggregate(y, by=list(x), FUN=median, na.rm = TRUE)) xy.quantile <- aggregate(y, by=list(x), FUN=function(d){quantile(d, c(0.05, 0.5, 0.95), na.rm = TRUE)}) panel.points( xy.median[,1], xy.median[,2], pch=16, cex=0.5) # print((xy.quantile[,2])[,3]) panel.points( xy.quantile[,1],(xy.quantile[,2])[,1], pch='-') panel.points( xy.quantile[,1],(xy.quantile[,2])[,3], pch='-') }, strip = strip.custom(strip.names = TRUE, strip.levels = TRUE), scales=list(x=list(at=c(1,5,10,15,20,25,30)), y=list(at=c(0, 10,20,30,40,50))), pch=16, col=rgb(0.5, 0.5, 0.5, alpha = 0.01), layout = c(4, 1), ylim = c(0, 50), xlab= 'Number of flycodes', ylab ='CV [%]' ) print(NestLink_relative_flycode_simulation)
length
corresponds to the number accessions. i
indicates the number of removed Flycodes in each accession group and can be ignored.
kable(FlycodeRelativeStatistics(P, mode = 'bio'))
kable(FlycodeRelativeStatistics(P, mode = 'tech_coli'))
kable(FlycodeRelativeStatistics(P, mode = 'tech_smeg'))
Here is the output of sessionInfo()
on the system on which this
document was compiled:
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.