library(lattice) cv <- 1 - 1:7 / 10 t <- trellis.par.get("strip.background") t$col <- (rgb(cv,cv,cv)) trellis.par.set("strip.background",t)
filename <- system.file("extdata/PGexport2_normalizedAgainstSBstandards_Peptides.csv", package = "NestLink") P <- read.csv(filename, header = TRUE, sep=';') dim(P)
remove modificatio
P <- P[P$Modifications == '', ] dim(P)
select rows
P <- P[,c('Accession', 'Sequence', "X20170919_05_62465_nl5idx1.3_6titratecoli", "X20170919_16_62465_nl5idx1.3_6titratecoli", "X20170919_09_62466_nl5idx1.3_7titratesmeg", "X20170919_14_62466_nl5idx1.3_7titratesmeg")] dim(P)
rename column names
names(P)<-c('Accession','Sequence','coli1', 'coli2', 'smeg1', 'smeg2') dim(P)
remove all rows with invalid identidfier
P<- P[grep("^P[0-9][A-Z][0-9]", P$Accession), ] P<-droplevels(P)
add flycode set annotation
P$FCset_ng <- NA P$FCset_ng[P$Accession %in% c('P1A4', 'P1B4', 'P1C4', 'P1D4', 'P1E4', 'P1F4')] <- 92 P$FCset_ng[P$Accession %in% c('P1A5', 'P1B5', 'P1C5', 'P1D5', 'P1G4', 'P1H4')] <- 295 P$FCset_ng[P$Accession %in% c('P1A6', 'P1B6', 'P1E5', 'P1F5', 'P1G5', 'P1H5')] <- 943 P$FCset_ng[P$Accession %in% c('P1C6', 'P1D6', 'P1E6', 'P1F6', 'P1G6', 'P1H6')] <- 3017
table(P$FCset) Pabs <- P
table(P$Accession)
panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...) { usr <- par("usr"); on.exit(par(usr)) par(usr = c(0, 1, 0, 1)) r <- abs(cor(x, y)) txt <- format(c(r, 0.123456789), digits = digits)[1] txt <- paste0(prefix, txt) if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt) text(0.5, 0.5, txt, cex = cex.cor * r) }
rv <-lapply(unique(P$FCset_ng), function(q){ pairs(log(P[P$FCset_ng == q ,c('coli1', 'coli2', 'smeg1', 'smeg2')], 2), pch=16, col=rgb(0.5,0.5,0.5,alpha = 0.3), lower.panel = panel.cor, asp=1, main=q) })
FCfill2max
FCfill2max <- function(P, n = max(table(P$Accession))){ for (i in unique(P$Accession)){ idx <- which(P$Accession == i) # determine the number of missing rows for Accession i ndiff <- n - length(idx) if(length(idx) < n){ cand <- P[idx[1], ] cand[,2 ] <- "xxx" cand[,3:6 ] <- 0 for (j in 1:ndiff){ P <- rbind(P, cand) } } } P }
P.mean <- apply(P[, c('coli1', 'coli2', 'smeg1', 'smeg2')],1, FUN=mean) P.sd <- apply(P[, c('coli1', 'coli2', 'smeg1', 'smeg2')],1, FUN=sd) boxplot(100*P.sd/P.mean ~ P$FCset_ng,log='y', ylab='CV%')
P <- FCfill2max(P)
FCstatistic
using coli1
columnFCstatistic <- function(P){ PP <- aggregate(P$coli1 ~ P$Accession + P$FCset_ng, FUN=sum) names(PP) <- c('Accession', 'FCset_ng', 'coli1_sum') PPP <- aggregate(PP$coli1_sum ~ PP$FCset_ng, FUN=mean) P.mean <- aggregate(PP$coli1_sum ~ PP$FCset_ng, FUN=mean) P.sd <- aggregate(PP$coli1_sum ~ PP$FCset_ng, FUN=sd) P.cv <- aggregate(PP$coli1_sum ~ PP$FCset_ng, FUN=function(x){100*sd(x)/mean(x)}) P.length <- aggregate(P$coli1 ~ P$FCset_ng, FUN=length) rv <- data.frame(FCset_ng=P.sd[,1],cv=P.cv[,2], length=P.length[,2]) rv } FCstatistic(P)
FCrandom
# TODO(cp); make it work for n = 0 FCrandom <- function(P, n = 1){ for (i in unique(P$Accession)){ idx <- which(P$Accession == i) stopifnot(length(idx) >= n) smp <- sample(length(idx), n) P <- P[-idx[smp],] } P$n <- n P }
set.seed(8) S <- do.call('rbind', lapply(1:29, function(i){ FCstatistic(FCrandom(P, i)) })) xyplot(cv ~ length|FCset_ng, data =S, layout=c(4,1), strip = strip.custom(strip.names = TRUE, strip.levels = TRUE) )
S.rep <- do.call('rbind', lapply(1:50, function(s){ set.seed(s); S <- do.call('rbind', lapply(1:29, function(i){FCstatistic(FCrandom(P, i))}))}))
xyplot(cv ~ length/6 | FCset_ng, data =S.rep, panel = function(x,y,...){ panel.xyplot(x,y,...) panel.loess(x,y)}, strip = strip.custom(strip.names = TRUE, strip.levels = TRUE), pch=16, col=rgb(0.5,0.5,0.5,0.2), layout = c(4,1))
P <- na.omit(P) P <- P[P$coli1 >0,] P0<-P P0$coli1 <- P0$coli1 / sum(P0$coli1) P0$coli2 <- P0$coli2 / sum(P0$coli2) P0$smeg1 <- P0$smeg1 / sum(P0$smeg1) P0$smeg2 <- P0$smeg2 / sum(P0$smeg2) P0$coli1 <- (log(P0$coli1,2) - mean(log(P0$coli1,2))) / sd(log(P0$coli1,2)) P0$coli2 <- (log(P0$coli2,2) - mean(log(P0$coli2,2))) / sd(log(P0$coli2,2)) P0$smeg1 <- (log(P0$smeg1,2) - mean(log(P0$smeg1,2))) / sd(log(P0$smeg1,2)) P0$smeg2 <- (log(P0$smeg2,2) - mean(log(P0$smeg2,2))) / sd(log(P0$smeg2,2)) P$coli1 <- (log(P$coli1,2) - mean(log(P$coli1,2))) / sd(log(P$coli1,2)) P$coli2 <- (log(P$coli2,2) - mean(log(P$coli2,2))) / sd(log(P$coli2,2)) P$smeg1 <- (log(P$smeg1,2) - mean(log(P$smeg1,2))) / sd(log(P$smeg1,2)) P$smeg2 <- (log(P$smeg2,2) - mean(log(P$smeg2,2))) / sd(log(P$smeg2,2))
b <- boxplot(P$coli1 - P$coli2, P$coli1 - P$smeg1, P$coli1 - P$smeg2,P$coli2 - P$smeg1, P$coli2 - P$smeg2 , P$smeg1 - P$smeg2, ylab='log2ratios', ylim = c(-1,1)) axis(1, 1:6, c('coli[12]', 'coli1-smeg1', 'coli1-smeg2', 'coli2-smeg1', 'coli2- smeg2','smeg[12]'))
op <- par(mfrow = c(2, 4)) boxplot(P$coli1 ~ P$FCset_ng) boxplot(P$coli2 ~ P$FCset_ng) boxplot(P$smeg1 ~ P$FCset_ng) boxplot(P$smeg2 ~ P$FCset_ng) boxplot(P0$coli1 ~ P0$FCset_ng, main="sum") boxplot(P0$coli2 ~ P0$FCset_ng, main="sum") boxplot(P0$smeg1 ~ P0$FCset_ng, main="sum") boxplot(P0$smeg2 ~ P0$FCset_ng, main="sum")
b <- boxplot(P$coli1, P$coli2, P$smeg1, P$smeg2) axis(3, 1:4, names(P[3:6])) outliers.idx <- sapply(1:length(b$group), function(i){ q <- P[, 2+ b$group[i]] == b$out[i]; text(b$group[i], b$out[i], P[q, 2], pos=4, cex=0.4); text(b$group[i], b$out[i], P[q, 1], pos=2, cex=0.4); which(q)})
P[unique(outliers.idx),] # DONT # P <- P[-unique(outliers.idx),]
rv <-lapply(unique(P$FCset_ng), function(q){ pairs((P[P$FCset_ng == q ,c('coli1', 'coli2', 'smeg1', 'smeg2')]), pch=16, col=rgb(0.5,0.5,0.5,alpha = 0.3), lower.panel = panel.cor, asp=1, main=q) })
P$log2ratios <- 0.5* (P$coli1 + P$coli2) - 0.5 * (P$smeg1 + P$smeg2)
bwplot(log2ratios ~ Accession | FCset_ng, data = P, strip = strip.custom(strip.names = TRUE, strip.levels = TRUE), panel = function(...){ panel.abline(h=0, col='red') panel.bwplot(...) }, ylim=c(-1,1), scales = list(x = list(relation = "free", rot=45)), layout = c(4,1)) bwplot(log2ratios ~ FCset_ng, data = P, horizontal = FALSE, panel = function(...){ panel.abline(h=0, col='red') panel.bwplot(...) }, ylim=c(-1,1), scales = list(x = list(relation = "free", rot=45)), layout = c(1,1))
boxplot(log2ratios ~ FCset_ng,data=P,ylim=c(-1,1)) abline(h=0, col=rgb(1,0,0,alpha=0.4))
P<-na.omit(P) xyplot(log2ratios ~ Accession | FCset_ng, data = P, strip = strip.custom(strip.names = TRUE, strip.levels = TRUE), panel = function(x,y){ panel.abline(h=0, col='red') panel.xyplot(x,y, pch=16, col=rgb(0.5,0.5,0.5,alpha = 0.5)) panel.points(x, mean(y)) panel.points(x, mean(y) + sd(y), pch='-', col='red') panel.points(x, mean(y) - sd(y), pch='-', col='red') }, ylim=c(-1,1), scales = list(x = list(relation = "free", rot=45)), layout = c(4,1))
P.cv <- aggregate(P$log2ratios ~ P$Accession, FUN=function(x){sd(x)/mean(x)})
P<-na.omit(P) trellis.par.set("strip.background",t) xyplot(log2ratios ~ FCset_ng|FCset_ng, data = P, strip = strip.custom(strip.names = TRUE, strip.levels = TRUE), panel = function(x,y){ panel.abline(h=0, col='red') panel.xyplot(x,y, pch=16, col=rgb(0.5,0.5,0.5,alpha = 0.5)) panel.points(x, mean(y)) # panel.points(x, median(y),col='green') panel.points(x, mean(y) + sd(y), pch='-', col='red',cex=5) ltext(x, mean(y) + sd(y), round(sd(y),3), pos=4) panel.points(x, mean(y) - sd(y), pch='-', col='red',cex=5) }, ylim=c(-2,2), scales = list(x = list(relation = "free", rot=45)), layout = c(4,1))
compute p-values using t.test
.
P.test <- P[P$coli1>0 & P$coli2>0, ] P.test$pvalue <- apply(P.test[, c('coli1', 'coli2', 'smeg1', 'smeg2')], 1, function(x){ t.test(x[1:2], x[3:4], alternative="two.side", paired = FALSE)$p.value } )
xyplot(-log10(pvalue) ~ log2ratios| FCset_ng, panel = function(...){ panel.abline(v=c(-1,1,0), col=c('lightgray', 'lightgray', rgb(0.9,0.1,0.1,0.4))) panel.abline(h=-log(0.05,10), col='lightgray') panel.xyplot(..., pch=16, col=rgb(0.5, 0.5 ,0.5, alpha = 0.5)) }, data = P.test, strip = strip.custom(strip.names = TRUE, strip.levels = TRUE), layout=c(4,1)) xyplot(-log10(p.adjust(pvalue, method="BH")) ~ log2ratios | FCset_ng, panel = function(...){ panel.abline(v=c(-1,1,0), col=c('lightgray', 'lightgray', rgb(0.9,0.1,0.1,0.4))) panel.abline(h=-log(0.05,10), col='lightgray') panel.xyplot(..., pch=16, col=rgb(0.5, 0.5 ,0.5, alpha = 0.5)) }, data = P.test, strip = strip.custom(strip.names = TRUE, strip.levels = TRUE), layout=c(4,1), sub="applies Benjamini & Hochberg p-value adjustment")
FCstatistics
P <- FCfill2max(P) FCstatistics <- function(P){ if(!"log2ratios" %in% names(P)){ P$log2ratios <- (log(0.5 * (P$coli1 + P$coli2), 2) - log(0.5 * (P$smeg1 + P$smeg2), 2)) warning("define log2ratios.") } P.length <- aggregate(P$log2ratios ~ P$FCset_ng, FUN=length) names(P.length) <- c('FCset_ng', 'length') P <- na.omit(P) P.mean <- aggregate(P$log2ratios ~ P$FCset_ng, FUN=mean) names(P.mean) <- c('FCset_ng', 'mean') P.median <- aggregate(P$log2ratios ~ P$FCset_ng, FUN=median) names(P.median) <- c('FCset_ng', 'median') P.sd <- aggregate(P$log2ratios ~ P$FCset_ng, FUN=sd) names(P.sd) <- c('FCset_ng', 'sd') rv <- data.frame(FCset_ng=P.mean$FCset_ng, mean=P.mean$mean, sd=P.sd$sd, cv=P.sd$sd/P.mean$mean, length=P.length$length, median=P.median$median) rv }
library(parallel) set.seed(1) P.replicate <- do.call('rbind', mclapply(1:200, function(run){ do.call('rbind', lapply(1:29, function(i) { rv <- FCstatistics(FCrandom(P, i)) rv$run = run rv })) }))
xyplot(mean ~ length/6 | FCset_ng, data=P.replicate, panel = function(...){ panel.xyplot(...) panel.abline(h=0, col='red') }, strip = strip.custom(strip.names = TRUE, strip.levels = TRUE), ylim=c(-1,1), pch=16, col=rgb(0.5, 0.5, 0.5, alpha = 0.1), layout = c(4,1), xlab= 'number of FlyCodes', ylab ='mean of random log2ratios' )
TODO(cp): rename FGset to ConcGr
op <- par(mfrow=c(1,4)) lapply(unique(P.replicate$FCset_ng), function(FCset_ng){ S <- P.replicate[P.replicate$FCset_ng == FCset_ng,] nFlyCodes <- S$length / 6 plot(S$mean ~ nFlyCodes, pch=16,col=rgb(0.1,0.1,0.9,alpha=0.2), main=FCset_ng, ylim=c(-2,2), xlim=c(0,30)) col.sd <- rgb(0.9,0.0,0.3,alpha=0.1) points(nFlyCodes, S$mean + S$sd, col=col.sd, pch=16) points(nFlyCodes, S$mean - S$sd, col=col.sd, pch=16) })
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.