# Hello, world!
#
# This is an example function named 'hello'
# which prints 'Hello, world!'.
#
# You can learn more about package authoring with RStudio at:
#
# http://r-pkgs.had.co.nz/
#
# Some useful keyboard shortcuts for package authoring:
#
# Build and Reload Package: 'Cmd + Shift + B'
# Check Package: 'Cmd + Shift + E'
# Test Package: 'Cmd + Shift + T'
testBerkeley<-function(){
#library(NITPicker)
#boy growth rates
mat=growth$hgtm
xVals=growth$age
plot(c(), xlim=c(min(xVals), max(xVals)),
ylim=c(min(mat), max(mat)),
xlab='age', ylab='height')
apply(mat, 2, function(i){
lines(xVals, i)
})
#growth rate
rateBoy=t(sapply(c(2:length(xVals)), function(i){
(mat[i,]-mat[i-1,])/(xVals[i]-xVals[i-1])
}))
plot(c(), xlim=c(min(xVals), max(xVals)),
ylim=c(min(rateBoy), max(rateBoy)),
xlab='age', ylab='growth rate')
apply(rateBoy, 2, function(i){
lines(xVals[2:length(xVals)], i)
})
#girl growth rates
#boy growth rates
mat=growth$hgtf
xVals=growth$age
plot(c(), xlim=c(min(xVals), max(xVals)),
ylim=c(min(mat), max(mat)),
xlab='age', ylab='height')
apply(mat, 2, function(i){
lines(xVals, i)
})
#growth rate
rateGirl=t(sapply(c(2:length(xVals)), function(i){
(mat[i,]-mat[i-1,])/(xVals[i]-xVals[i-1])
}))
plot(c(), xlim=c(min(xVals), max(xVals)),
ylim=c(min(rateGirl), max(rateGirl)),
xlab='age', ylab='growth rate')
apply(rateGirl, 2, function(i){
lines(xVals[2:length(xVals)], i, col='darkred')
})
apply(rateBoy, 2, function(i){
lines(xVals[2:length(xVals)], i, col='darkblue')
})
#generate perturbations:
plot(c(), xlim=c(min(xVals), max(xVals)),
ylim=c(min(rateGirl), max(rateGirl)),
xlab='age', ylab='growth rate (sampled)')
mat=rateGirl
tp=xVals[2:length(xVals)]
generated=generatePerturbations(mat, tp, spline=1)
for(i in c(1:20)){
lines(generated$time, generated$ft[,i], col='darkred')
}
mat=rateBoy
tp=xVals[2:length(xVals)]
generated=generatePerturbations(mat, tp, spline = 1)
for(i in c(1:20)){
lines(generated$time, generated$ft[,i], col='darkblue')
}
#plot inverse fano factors (dispersion index) of sampled curves
mat=rateGirl
tp=xVals[2:length(xVals)]
generated1=generatePerturbations(mat, tp, spline=1, numPert=2000)
mat=rateBoy
generated2=generatePerturbations(mat, tp, spline=1, numPert=2000)
diff=generated1$ft-generated2$ft
plot(c(), xlim=c(min(diff), max(diff)),
ylim=c(0, 30),
xlab='age', ylab='growth rate (sampled)')
for(i in c(1:100)){
lines(generated1$time, generated1$ft[,i], col='darkred')
}
for(i in c(1:100)){
lines(generated2$time, generated2$ft[,i], col='darkblue')
}
plot(c(), xlim=c(min(generated1$time), max(generated1$time)),
ylim=c(-10, 10),
xlab='age', ylab='growth rate (sampled)')
variances=apply(diff, 1, function(i){var(i)})
print(length(variances))
for(i in c(1:1000)){
# print(length(generated$ft[,i]))
lines(generated1$time, (diff[,i])/sqrt(variances), col=rgb(0.1, 0.1, 0.1, 0.01))
}
plot(c(), xlim=c(min(xVals), max(xVals)),
ylim=c(0, 30),
xlab='age', ylab='growth rate (sampled)')
variances=apply(diff, 1, function(i){var(i)})
print(length(variances))
for(i in c(1:1000)){
# print(length(generated$ft[,i]))
lines(tp, approx(generated$time, y=(diff[,i])/sqrt(variances), xout=tp)$y, col=rgb(0.1, 0.1, 0.1, 0.1))
}
#input this, sampled at the original time points, into F1
tp=xVals[2:length(xVals)]
set.seed(123)
sapply(c(1:50), function(sam){
mat1=rateGirl
subset1=sample(1:length(mat1[1,]), 0.5*length(mat1[1,]))
mat2=rateBoy
subset2=sample(1:length(mat2[1,]), 0.5*length(mat2[1,]))
b=findPathF3(tp, mat1[,subset1], mat2[,subset2], 5, spline=1, numPerts=1000, fast=T)
# mat=rateGirl
# subset1=sample(1:length(mat[1,]), 0.5*length(mat[1,]))
# generated1=generatePerturbations(mat[,subset1], tp, spline=1, numPert=2000)
#
# # generated2=generatePerturbations(mat[,subset2], tp, spline=1, numPert=2000)
# diff=generated1$ft-generated2$ft
# print('calculated diff')
# variances=apply(diff, 1, function(i){var(i)})
# print('calculated var')
# mat=sapply(c(1:100), function(k){
# # print(length(generated$ft[,i]))
# approx(generated1$time, y=(diff[,k]*diff[,k])/(variances*2000), xout=tp)$y
# })
#
#
#
# b=findPath(tp, rep(0, length(tp)), mat, 5, 1, numPerts=20, multiple=F)
# plot(c(), xlim=c(min(xVals), max(xVals)),
# ylim=c(0, 30),
# xlab='age', ylab='growth rate (sampled)')
# for(i in c(1:100)){
# # print(length(generated$ft[,i]))
# lines(tp, mat1[,i], col=rgb(0.1, 0.1, 0.1, 0.1))
# }
# abline(v=tp[b])
write.table(b, file=paste('selection_berkeleyDispersion', sam, '.txt', sep=''))
write.table(subset1, file=paste('subset1_berkeleyDispersion', sam, '.txt', sep=''))
write.table(subset2, file=paste('subset2_berkeleyDispersion', sam, '.txt', sep=''))
print(b)
})
#draw an example
sampledPoints=sapply(c(1:50), function(sam){
read.table(paste('selection_berkeleyDispersion', sam, '.txt', sep=''))[,1]
})
sampledCurves1=sapply(c(1:50), function(sam){
read.table(paste('subset1_berkeleyDispersion', sam, '.txt', sep=''))[,1]
})
sampledCurves2=sapply(c(1:50), function(sam){
read.table(paste('subset2_berkeleyDispersion', sam, '.txt', sep=''))[,1]
})
set.seed(12)
#Using all the points, try to classify by gender, for each of the subsamples
predAllPoints=sapply(c(1:dim(sampledCurves1)[2]), function(i){
combMat=cbind(rateGirl[,sampledCurves1[,i]], rateBoy[,sampledCurves2[,i]])
fdat=fdata(t(combMat), argvals=tp)
gender=as.factor(sapply(colnames(combMat), function(j){substring(j, 1, 1)[[1]][1]}))
a=classif.DD(gender, fdat)
combMat=cbind(rateGirl[,which(!(c(1:length(rateGirl[1,])) %in% sampledCurves1[,i]))], rateBoy[,which(!(c(1:length(rateBoy[1,])) %in% sampledCurves2[,i]))])
fdat=fdata(t(combMat), argvals=tp)
gender=as.factor(sapply(colnames(combMat), function(j){substring(j, 1, 1)[[1]][1]}))
a=predict(a, fdat)
length(which(as.character(a)==as.character(gender)))/length(gender)
})
#repeat, but only include 5 time points, sampled randomly
predRandomPoints=sapply(c(1:dim(sampledCurves1)[2]), function(i){
pts=sample(c(1:dim(rateGirl)[1]), 5)
combMat=cbind(rateGirl[pts,sampledCurves1[,i]], rateBoy[pts,sampledCurves2[,i]])
fdat=fdata(t(combMat), argvals=tp[pts])
gender=as.factor(sapply(colnames(combMat), function(j){substring(j, 1, 1)[[1]][1]}))
a=classif.DD(gender, fdat)
combMat=cbind(rateGirl[pts,which(!(c(1:length(rateGirl[1,])) %in% sampledCurves1[,i]))], rateBoy[pts,which(!(c(1:length(rateBoy[1,])) %in% sampledCurves2[,i]))])
fdat=fdata(t(combMat), argvals=tp[pts])
gender=as.factor(sapply(colnames(combMat), function(j){substring(j, 1, 1)[[1]][1]}))
a=predict(a, fdat)
length(which(as.character(a)==as.character(gender)))/length(gender)
})
predEvenPoints=sapply(c(1:dim(sampledCurves1)[2]), function(i){
pts=c(1, 7, 13, 22, 30)#sample(c(1:dim(rateGirl)[1]), 5)
combMat=cbind(rateGirl[pts,sampledCurves1[,i]], rateBoy[pts,sampledCurves2[,i]])
fdat=fdata(t(combMat), argvals=tp[pts])
gender=as.factor(sapply(colnames(combMat), function(j){substring(j, 1, 1)[[1]][1]}))
a=classif.DD(gender, fdat)
combMat=cbind(rateGirl[pts,which(!(c(1:length(rateGirl[1,])) %in% sampledCurves1[,i]))], rateBoy[pts,which(!(c(1:length(rateBoy[1,])) %in% sampledCurves2[,i]))])
fdat=fdata(t(combMat), argvals=tp[pts])
gender=as.factor(sapply(colnames(combMat), function(j){substring(j, 1, 1)[[1]][1]}))
a=predict(a, fdat)
length(which(as.character(a)==as.character(gender)))/length(gender)
})
predNITPickPoints=sapply(c(1:dim(sampledCurves1)[2]), function(i){
pts=sampledPoints[,i]
print(pts)
combMat=cbind(rateGirl[pts,sampledCurves1[,i]], rateBoy[pts,sampledCurves2[,i]])
fdat=fdata(t(combMat), argvals=tp[pts])
gender=as.factor(sapply(colnames(combMat), function(j){substring(j, 1, 1)[[1]][1]}))
a=classif.DD(gender, fdat)
print('trained')
combMat=cbind(rateGirl[pts,which(!(c(1:length(rateGirl[1,])) %in% sampledCurves1[,i]))], rateBoy[pts,which(!(c(1:length(rateBoy[1,])) %in% sampledCurves2[,i]))])
fdat=fdata(t(combMat), argvals=tp[pts])
gender=as.factor(sapply(colnames(combMat), function(j){substring(j, 1, 1)[[1]][1]}))
print('tested')
#print(a)
a=predict(a, fdat)
length(which(as.character(a)==as.character(gender)))/length(gender)
})
pdf(paste("Figure_growthRate2.pdf", sep=""), width=8, height=6.6,pointsize = 12)
par(mfrow=c(2,2));
par(mar=c(4, 4, 1, 1)+0.1);
par(cex=0.9)
set.seed(1234)
#################plot original curves with lines indicated
sam=1
cols=sampledCurves1[,sam]
mat=rateGirl
matTest=mat[,which(!(c(1:length(mat[1,])) %in% cols))]
plot(c(), xlim=c(1, max(tp)), ylim=c(min(mat), max(mat)), xlab='age (yrs)', ylab='growth rate (cm/yr)')
apply(matTest, 2, function(i){lines(tp, i, col='darkred')})
#abline(v=sampledPoints[,1])
mat=rateBoy
matTest=mat[,which(!(c(1:length(mat[1,])) %in% cols))]
apply(matTest, 2, function(i){lines(tp, i, col='darkblue')})
abline(v=tp[sampledPoints[,1]])
legend(3,30, c('girl', 'boy'), lty=1, col=c('darkred', 'darkblue'), bty='n')
#################plot generated curves with lines indicated
#generate perturbations:
plot(c(), xlim=c(1, max(tp)), ylim=c(min(mat), max(mat)), xlab='age (yrs)', ylab='growth rate (cm/yr) - sampled')
mat=rateGirl
tp=xVals[2:length(xVals)]
generated=generatePerturbations(mat, tp, spline=1)
for(i in c(1:20)){
lines(generated$time, generated$ft[,i], col='darkred')
}
mat=rateBoy
tp=xVals[2:length(xVals)]
generated=generatePerturbations(mat, tp, spline = 1)
for(i in c(1:20)){
lines(generated$time, generated$ft[,i], col='darkblue')
}
abline(v=tp[sampledPoints[,1]])
################sampled inverse CV curves
mat=rateGirl
tp=xVals[2:length(xVals)]
generated1=generatePerturbations(mat, tp, spline=1, numPert=2000)
mat=rateBoy
generated2=generatePerturbations(mat, tp, spline=1, numPert=2000)
diff=generated1$ft-generated2$ft
plot(c(), xlim=c(min(xVals), max(xVals)),
ylim=c(-10, 10),
xlab='age (yrs)', ylab='inverse coefficient of variation - sampled')
variances=apply(diff, 1, function(i){var(i)})
print(length(variances))
for(i in c(1:1000)){
# print(length(generated$ft[,i]))
lines(tp, approx(generated1$time, y=(diff[,i])/sqrt(variances), xout=tp)$y, col=rgb(0, 0.5, 0, 0.1))
}
abline(v=tp[sampledPoints[,1]])
predCompiled=list(predAllPoints*100, predNITPickPoints*100, predRandomPoints*100, predEvenPoints*100)
boxplot( predCompiled, names=c('All points', 'NITPick', 'Random', 'Even'), at=c(1,2,3,4), xlab='point selection strategy', ylab='% accuracy')
sapply(c(1:length(predCompiled)), function(i){
points(sapply(1:length(predCompiled[[i]]), function(k){jitter(i, amount=0.2)}), predCompiled[[i]], col='darkgreen', pch=20)
})
dev.off()
}
testCanada <-function(){
#library(NITPicker)
library(gridBase)
library(grid)
set.seed(987)
mat=CanadianWeather$monthlyTemp
sapply(c(1:10), function(sam){
subset=sample(1:length(mat[1,]), 0.5*length(mat[1,]))
b=findPathF2(c(1:length(mat[,'Resolute'])), mat[,'Resolute'], mat[,subset], 5, spline=1, numPerts=1000)
write.table(b, file=paste('canadaSelection_monthly', sam, '.txt', sep=''))
write.table(subset, file=paste('canadaSubset_monthly', sam, '.txt', sep=''))
plot(mat[,'Resolute'], type='l')
abline(v=b)
print(b)
})
sampledPoints=sapply(c(1:10), function(sam){
read.table(paste('canadaSelection_monthly', sam, '.txt', sep=''))[,1]
})
sampledCurves=sapply(c(1:10), function(sam){
read.table(paste('canadaSubset_monthly', sam, '.txt', sep=''))[,1]
})
pdf(paste("Figure_CanadaWeather.pdf", sep=""), width=9.5, height=6.6,pointsize = 12)
#par(mfrow=c(2,2));
par(mar=c(4, 4, 1, 1)+0.1);
par(cex=0.9)
layout(matrix(c(1,1,1,2,2,2,
3,3, 4,4,5,5), 2, 6, byrow = TRUE))
set.seed(987)
generated=generatePerturbations(mat, c(1:length(mat[,1])))
plot(c(), xlim=c(1,length(mat[,1])),
ylim=c(min(mat), 30),
xlab='month', ylab='avg. temperature (C)', xaxt="n")
axis(1, c(1:12), rownames(mat))
apply(mat, 2, function(i){
lines(i)
})
lines(mat[,'Resolute'], col="red")
for(i in c(1:20)){
lines(generated$time, generated$ft[,i], col='darkblue', lty=2)
}
abline(v=sampledPoints[,1])
legend(5.5, -10, c('Resolute (control)', 'other Canadian cities', 'sampled functions'), col=c('red', 'black', 'darkblue'), lty=c(1, 1, 2), bty='o')
distancesTestSet=sapply(c(1:10), function(sam){
cols=sampledCurves[,sam]
matTest=mat[,which(!(c(1:length(mat[1,])) %in% cols))]
indexTest=sampledPoints[,sam]
apply(matTest, 2, function(place){
L2(c(1:length(mat[,1])), mat[,'Resolute'], place, 1, length(mat[,1]), indexTest)
})
})
row.names(distancesTestSet)=NULL
distancesTestSetRandom=sapply(c(1:10), function(sam){
cols=sampledCurves[,sam]
matTest=mat[,which(!(c(1:length(mat[1,])) %in% cols))]
indexTest=sort(sample(1:length(mat[,1]), length(sampledPoints[,sam]))) #sampledPoints[,sam]
print(indexTest)
apply(matTest, 2, function(place){
L2(c(1:length(mat[,1])), mat[,'Resolute'], place, 1, length(mat[,1]), indexTest)
})
})
row.names(distancesTestSet)=NULL
distancesTestSetEven=sapply(c(1:10), function(sam){
cols=sampledCurves[,sam]
matTest=mat[,which(!(c(1:length(mat[1,])) %in% cols))]
indexTest=c(1,3,6,9,12)
apply(matTest, 2, function(place){
L2(c(1:length(mat[,1])), mat[,'Resolute'], place, 1, length(mat[,1]), indexTest)
})
})
row.names(distancesTestSet)=NULL
allTogetherForGGPlot=matrix(ncol=4)
colnames(allTogetherForGGPlot)=c('replicate', 'place', 'L2', 'type')
for(i in 1:length(distancesTestSet[1,])){
for(j in 1:length(distancesTestSet[,1])){
allTogetherForGGPlot=rbind(allTogetherForGGPlot, c(i-1, j, sqrt(distancesTestSet)[j,i], 'NITPick'))
}
}
for(i in 1:length(distancesTestSetEven[1,])){
for(j in 1:length(distancesTestSetEven[,1])){
allTogetherForGGPlot=rbind(allTogetherForGGPlot, c(i-1, j, sqrt(distancesTestSetEven)[j,i], 'Even'))
}
}
for(i in 1:length(distancesTestSetRandom[1,])){
for(j in 1:length(distancesTestSetRandom[,1])){
# print(paste(distancesTestSetRandom[j,i], sqrt(distancesTestSetRandom[j,i])))
temp=c(i, j, sqrt(distancesTestSetRandom)[j,i], 'Random')
allTogetherForGGPlot=rbind(allTogetherForGGPlot, c(i-1, j, sqrt(distancesTestSetRandom)[j,i], 'Random'))
}
}
allTogetherForGGPlot=data.frame(allTogetherForGGPlot)
allTogetherForGGPlot[,1]=as.factor(allTogetherForGGPlot[,1])
allTogetherForGGPlot[,2]=as.factor(allTogetherForGGPlot[,2])
allTogetherForGGPlot[,3]=as.numeric(as.character(allTogetherForGGPlot[,3]))
allTogetherForGGPlot[,4]=factor(allTogetherForGGPlot[,4], levels=c('NITPick', 'Random', 'Even'))
allTogetherForGGPlot=allTogetherForGGPlot[-1,]
####plot comparisons
plot.new()
vps <- baseViewports()
pushViewport(vps$figure) ## I am in the space of the autocorrelation plot
vp1 <-plotViewport(c(0,0,0,0))#c(1.8,1,0,1)) ## create new vp with margins, you play with this values
require(ggplot2)
e <- ggplot(allTogetherForGGPlot, aes(x=replicate, y=L2, color=type))+
scale_color_manual(values = c("#00AFBB", "#E7B800", "#FC4E07"))+geom_boxplot()
print(e,vp = vp1)
#boxplot()
######Draw one example:
sam=1
cols=sampledCurves[,sam]
matTest=mat[,which(!(c(1:length(mat[1,])) %in% cols))]
diff=apply(matTest, 2, function(i){i-mat[,'Resolute']})[]
plot(c(), xlim=c(1, length(mat[,1])), ylim=c(min(diff), max(diff)), xlab='month', ylab='difference in temperature (C)', xaxt="n")
axis(1, c(1:12), rownames(mat))
apply(diff, 2, function(i){lines(i, col='lightgrey')})
abline(v=sampledPoints[,1])
pts=sampledPoints[,1]
apply(diff, 2, function(i){
xvals=c(1:length(i),
length(i),
pts[length(pts):1],
1)
yvals=c(i,
i[pts[length(pts)]],
i[pts[length(pts):1]],
i[pts[1]])
polygon(xvals, yvals, border=NA, col=rgb(0.1, 0.7, 0.9, 0.4))
})
plot(c(), xlim=c(1, length(mat[,1])), ylim=c(min(diff), max(diff)), xlab='month', ylab='difference in temperature (C)', xaxt="n")
axis(1, c(1:12), rownames(mat))
apply(diff, 2, function(i){lines(i, col='lightgrey')})
pts=sort(sample(1:length(mat[,1]), length(sampledPoints[,sam])))#sampledPoints[,1]
abline(v=pts)
apply(diff, 2, function(i){
xvals=c(1:length(i),
length(i),
pts[length(pts):1],
1)
yvals=c(i,
i[pts[length(pts)]],
i[pts[length(pts):1]],
i[pts[1]])
polygon(xvals, yvals, border=NA, col=rgb(0.7, 0.6, 0, 0.4))
})
plot(c(), xlim=c(1, length(mat[,1])), ylim=c(min(diff), max(diff)), xlab='month', ylab='difference in temperature (C)', xaxt="n")
axis(1, c(1:12), rownames(mat))
apply(diff, 2, function(i){lines(i, col='lightgrey')})
pts=c(1,3,6,9,12)
abline(v=pts)
apply(diff, 2, function(i){
xvals=c(1:length(i),
length(i),
pts[length(pts):1],
1)
yvals=c(i,
i[pts[length(pts)]],
i[pts[length(pts):1]],
i[pts[1]])
polygon(xvals, yvals, border=NA, col=rgb(0.9, 0.05, 0.05, 0.4))
})
dev.off()
}
testPerturb <-function(){
data('simu_warp')
out1=gauss_model(simu_warp,n = 10)
#original
plot(simu_warp$time, simu_warp$f0[,1], type='l', ylim=c(0, 1.5))
for(i in c(1:length(simu_warp$f0[1,]))){
lines(simu_warp$time, simu_warp$f0[,i])
}
#generated
tw= time_warping(simu_warp$f0, simu_warp$time, MaxItr = 1000, showplot=FALSE)
out1=gauss_model(tw,n = 10)
plot(simu_warp$time, out1$ft[,1], type='l', ylim=c(0, 1.5))
for(i in c(1:length(out1$ft[1,]))){
lines(simu_warp$time, out1$ft[,i])
}
x=seq(0, 1, 0.01) #sort(seq(0, 1, 0.01)+rnorm(length(seq(0, 1, 0.01)), 0, 0.5))
training=sapply(c(1:21), function(i){
rnorm(1, 1, 0.3)*sin(x*20+rnorm(1, 0, 0.1))+rnorm(length(x), 0, 0.1) #+rnorm(length(x), 0, 0.5),
})
tp=x
tw= time_warping(training, tp, MaxItr = 200, method='median', showplot=FALSE)
gm=gauss_model(tw, n=10)
#original
plot(tp, training[,1], type='l', ylim=c(-1.5, 1.5))
for(i in c(1:length(training[1,]))){
lines(tp, training[,i])
}
#aligned
tw=align_fPCA(training, tp, num_comp = 10)
plot(tp, tw$fn[,1], type='l', ylim=c(-1.5, 1.5))
for(i in c(1:length(tw$fn[1,]))){
lines(tp, tw$fn[,i])
}
#generated
plot(tp, gm$ft[,1], type='l', ylim=c(-1.5, 1.5))
for(i in c(1:length(gm$ft[1,]))){
lines(tp, gm$ft[,i], col='red')
}
x=seq(0, 1, 0.05) #sort(seq(0, 1, 0.01)+rnorm(length(seq(0, 1, 0.01)), 0, 0.5))
training=sapply(c(1:21), function(i){
rnorm(1, 1, 0.3)*sin(x*20+rnorm(1, 0, 0.1))+rnorm(length(x), 0, 0.1) #+rnorm(length(x), 0, 0.5),
})
tp=x
plot(tp, training[,1], type='l', ylim=c(-1.5, 1.5))
for(i in c(1:10)){
lines(tp, training[,i])
}
plot(x, training[,1])
lines(seq(min(tp), max(tp), 0.01), deBoorWrapper(seq(min(tp), max(tp), 0.01), tp, training[,1], 3))
generated=generatePerturbations(training, tp)
plot(tp, training[,1], type='l', ylim=c(-1.5, 1.5))
for(i in c(1:10)){
lines(tp, training[,i])
}
for(i in c(1:10)){
lines(generated$time, generated$ft[,i], col='red')
}
scoreF1(tp, training[,1], training, 0.11, 0.5, c(3, 5, 6, 9, 11, 12, 15))
}
grab_grob <- function(){
grid.echo()
grid.grab()
}
drawSurveyBasicAnalysisFigures<-function(){
library(gridGraphics)
library(grid)
library(glmnet)
###########draw stats about survey#######################
survey=read.table("surveyData_nov_1_2017.txt", sep="\t", header=T)[,1:50]
pdf(paste("Figure_nitpicker_pieChart_level.pdf", sep=""), width=3.5, height=3,pointsize = 10)
par(mfrow=c(1,1));
par(mar=c(10, 10, 10, 10)+0.1);
par(cex=0.5)
pie(table(survey[,"level"]), col=c("#FF9900", "#996699", "#00AFBB", "#E7B800", "#FC4E07", "#33CC99"))
dev.off()
pdf(paste("Figure_nitpicker_barplot_fieldDistribution.pdf", sep=""), width=7, height=3,pointsize = 10)
par(mfrow=c(1,1));
par(mar=c(5, 5, 5, 5)+0.1);
par(cex=0.5)
listOfFields=lapply(survey[,3], function(i){strsplit(as.character(i), ", ")[[1]]})
barplot(table(unlist(listOfFields))/length(listOfFields)*100, ylab="%")
dev.off()
################what strategies did people *claim* they used?
strategies=colnames(survey)[44:50]
strategyByPerson=survey[44:49]
strategyCount=sapply(strategies, function(i){
table(survey[,i])/length(survey[,1])*100
})
pdf(paste("Figure_nitpicker_barplot_strategiesClaimed.pdf", sep=""), width=8.5, height=3,pointsize = 7)
par(mfrow=c(1,1));
par(xpd=TRUE)
par(mar=c(5, 9, 3, 3)+0.1);
par(cex=1.2)
#barplot(strategyCount, ylab='%')
strategyCount2=strategyCount[,1:6]
colnames(strategyCount2)=c('peak expression', 'far apart', 'evenly spaced', 'largest perturbation', 'greatest slope', 'multiple genes expressed')
barplot(strategyCount2, ylab='%', xlab='time point selection heuristic')
legend(-1.5, 105, paste(c(5:1), c('(always)', '', '','','(never)')), fill=grey.colors(5)[5:1], bty='n')
dev.off()
library("gplots")
#combination of strategies?
# heatmap.2(apply(survey[,strategies[1:6]], c(1,2), function(i){as.numeric(substring(as.character(i),1,1))}), scale="none", col='bluered', trace="none")
a=apply(survey[,strategies[1:6]], c(1,2), function(i){as.numeric(substring(as.character(i),1,1))})
# pdf(paste("Figure_nitpicker_heatmap_strategiesCorrelation.pdf", sep=""), width=5, height=5, pointsize = 10)
# #par(mfrow=c(1,1));
#
# par(mar=c(30, 30, 12, 12)+0.1);
# #par(cex=0.1)
# temp=sapply(strategies[1:6], function(i){sapply(strategies[1:6], function(j){
# cor(a[,i], a[,j])
# })})
# colnames(temp)=c('peak expression', 'far apart', 'evenly spaced', 'largest perturbation', 'greatest slope', 'multiple genes expressed')
# rownames(temp)=c('peak expression', 'far apart', 'evenly spaced', 'largest perturbation', 'greatest slope', 'multiple genes expressed')
#
# heatmap.2(temp, scale="none", col='bluered', trace="none", symm=T, margins=c(16,16), key.title='Key', denscol='black', key.xlab='correlation')
# dev.off()
###############what points did people suggest?
freqTP_withGrant=sapply(c(1:4), function(i){
sapply(c(1:26), function(j){
length(which(survey[,paste("Q", i, "T", c(1:10), sep="")]==j))
})
})
freqTP_withoutGrant=sapply(c(1:4), function(i){
sapply(c(1:26), function(j){
length(which(survey[,paste("Q", i, "T", c(1:5), sep="")]==j))
})
})
#####Read in the values that are part of the survey:
setwd("surveyGraphs")
#read in all the perturbation sets
files=paste("simulateSkewedNormal_", c("mean", "stdev", "skew", "amp"), "_inputs.txt", sep="")
perturbations=lapply(files, function(i){
a=read.table(i, header=T)
a=a[,c(1,2,seq(3, 80, 3))]
a
})
names(perturbations)=c("mean", "stdev", "skew", "amp")
#first 4 genes from mean
meanPlots=lapply(c(1:4), function(geneID){
perturbations[["mean"]][which(perturbations[["mean"]][,"GeneID"]==geneID),c(3:length(perturbations[["mean"]][1,]))]
})
####combine these into a table for doing F1:
y1=sapply(meanPlots, function(i){i[1,]})
meanPlotsTransposed=lapply(meanPlots, function(i){
m=as.matrix(t(i))
print(dim(m))
#control=m[,1]
#div=m[,2]+m[,3]-control
sm=apply(m, 2, function(j){if(sum(j)==0){j}else{
j/sum(abs(j))}})
sm
})
##combine
#meanPlotsCombined=cbind()
y1=sapply(meanPlotsTransposed, function(i){i[,1]})
plot(c(), xlim=c(0,26), ylim=c(-0.3, 0.3))
########generate a lot of perturbations, subtract from control
meanPerturbations=sapply(c(1:4), function(geneID){
pertAll=generatePerturbations(meanPlotsTransposed[[geneID]], c(1:26), numPert=10000, spline=1)
pert=pertAll$ft
tim=pertAll$time
temp=apply(pert, 2, function(i){
ap=approx(tim, i, xout=c(1:26))
ap$y-meanPlotsTransposed[[geneID]][,1]
#plot(c(), xlim=c(0,26), ylim=c(-0.3, 0.3))
lines(c(1:26), ap$y-meanPlotsTransposed[[geneID]][,1], col=rgb(0.1,0.1,0.1,0.03))
ap$y-meanPlotsTransposed[[geneID]][,1]
})
#print(dim(temp))
apply(temp, 1, function(i){ mean(i)})
})
b=findPathF1(c(1:26), meanPerturbations, 5, 1, numPerts=1000, resampleTraining = F)
#b=findPath(c(1:26), t(y1), meanPlotsTransposed, 5, 1, multiple=T, type=1, numPerts=10)
pdf(paste("Figure_nitpicker_samplingWorks.pdf", sep=""), width=4, height=6,pointsize = 14)
par(mfrow=c(4,1));
par(mar=c(4, 4, 1, 1)+0.1);
par(cex=0.5)
perts=generatePerturbations(meanPlotsTransposed[[1]], c(1:26), spline=1)
plot(c(), ylim=c(min(perts$ft), max(perts$ft)), xlim=c(0, 26), xlab='time', ylab='gene expression -sampled')
sapply(c(1:20), function(i){
lines(perts$time, perts$ft[,i])
})
perts=generatePerturbations(meanPlotsTransposed[[2]], c(1:26), spline=1)
plot(c(), ylim=c(min(perts$ft), max(perts$ft)), xlim=c(0, 26), xlab='time', ylab='gene expression -sampled')
sapply(c(1:20), function(i){
lines(perts$time, perts$ft[,i])
})
perts=generatePerturbations(meanPlotsTransposed[[3]], c(1:26), spline=1)
plot(c(), ylim=c(min(perts$ft), max(perts$ft)), xlim=c(0, 26), xlab='time', ylab='gene expression -sampled')
sapply(c(1:20), function(i){
lines(perts$time, perts$ft[,i])
})
perts=generatePerturbations(meanPlotsTransposed[[4]], c(1:26), spline=1)
plot(c(), ylim=c(min(perts$ft), max(perts$ft)), xlim=c(0, 26), xlab='time', ylab='gene expression -sampled')
sapply(c(1:20), function(i){
lines(perts$time, perts$ft[,i])
})
dev.off()
#################
set.seed(123)
meanPlots=lapply(c(1:4), function(geneID){
perturbations[["mean"]][which(perturbations[["mean"]][,"GeneID"]==geneID),c(3:length(perturbations[["mean"]][1,]))]
})
geneIDs=list(1:4, 5:8, 9:12, 13:16)
mat=rbind(perturbations[["mean"]][which(perturbations[["mean"]][,"GeneID"] %in% geneIDs[[1]]),],
perturbations[["stdev"]][which(perturbations[["stdev"]][,"GeneID"] %in% geneIDs[[2]]),],
perturbations[["skew"]][which(perturbations[["skew"]][,"GeneID"] %in% geneIDs[[3]]),],
perturbations[["amp"]][which(perturbations[["amp"]][,"GeneID"] %in% geneIDs[[4]]),]
)
write.table(
mat, quote=F, row.names = F, file='surveyData.txt')
set.seed(123)
####combine these into a table for doing F1:
y1=sapply(meanPlots, function(i){i[1,]})
meanPlotsTransposed=lapply(meanPlots, function(i){
m=as.matrix(t(i))
print(dim(m))
#control=m[,1]
#div=m[,2]+m[,3]-control
sm=apply(m, 2, function(j){if(sum(j)==0){j}else{
j/sum(abs(j))}})
sm
})
meanPertsAll=lapply(c(1:4), function(geneID){
pertAll=generatePerturbations(t(meanPlots[[geneID]]), c(1:26), numPert=1000, spline=1)
a=pertAll$ft
b=pertAll$time
apply(a, 2, function(j){ approx(b, j, c(1:26))$y})
})
meanPerturbations=sapply(c(1:4), function(geneID){
pertAll=generatePerturbations(meanPlotsTransposed[[geneID]], c(1:26), numPert=10000, spline=1)
pert=pertAll$ft
tim=pertAll$time
temp=apply(pert, 2, function(i){
ap=approx(tim, i, xout=c(1:26))
ap$y-meanPlotsTransposed[[geneID]][,1]
#plot(c(), xlim=c(0,26), ylim=c(-0.3, 0.3))
lines(c(1:26), ap$y-meanPlotsTransposed[[geneID]][,1], col=rgb(0.1,0.1,0.1,0.03))
ap$y-meanPlotsTransposed[[geneID]][,1]
})
#print(dim(temp))
apply(temp, 1, function(i){ mean(i)})
})
#y1=matrix(rep(0, 26*4), nrow=26, ncol=4)
#b=findPath(c(1:length(meanPlots[[1]][1,])), t(y1), meanPlotsTransposed, 5, 1, multiple=T, type=1, numPerts=10)
y_together=cbind(meanPlotsTransposed[[1]][,1], meanPlotsTransposed[[2]][,1], meanPlotsTransposed[[3]][,1], meanPlotsTransposed[[4]][,1])
b_mean10=findPathF2(c(1:26), y_together, meanPlotsTransposed, 10, spline=1, numPerts=10000, resampleTraining = F, mult=T)
b_mean5=findPathF2(c(1:26), y_together, meanPlotsTransposed, 5, spline=1, numPerts=10000, resampleTraining = F, mult=T)
#findPathF2(c(1:26), meanPerturbations, 5, spline=1, numPerts=0, resampleTraining = F)
#b=findPath(c(1:26), rep(0,26), meanPerturbations, 10, 1, multiple=F, type=1, numPerts=0, resampleTraining = F)
plot(c(), xlim=c(0,26), ylim=c(-0.15, 0.15))
apply(meanPerturbations, 2, function(i){lines(c(1:26), i)})
abline(v=b)
pertType=1
pdf(paste("Figure_nitpicker_Results_meanForSurvey2.pdf", sep=""), width=4, height=6,pointsize = 10)
par(mfrow=c(5,1));
par(mar=c(4, 4, 1, 1)+0.1);
par(cex=0.5)
geneNames=paste("Gene", c("D", "E", "F", "G"))
counter=1
for(i in meanPlots){
# plot(c(), c(), xlim=c(0,dim(i)[2]), ylim=c(0, max(i)), main=geneNames[counter], xlab="time", ylab="gene expression")
plot(c(), c(), xlim=c(0,26), ylim=c(0, max(i)), main=geneNames[counter], xlab="time", ylab="gene expression")
counter=counter+1
lineTypes=c(1,2,3)
for(j in c(1:dim(i)[1])){
lines(c(1:dim(i)[2]), i[j,], lty=lineTypes[j])
}
abline(v=c(1:dim(i)[2]), col="lightgrey")
abline(v=b_mean10, col='red')
#abline(v=c(1:dim(i)[2])[which(freqTP_withGrant[,pertType]>25)], col="red")
}
# plot(freqTP_withGrant[,pertType], col="blue", type="l", ylim=c(0,50), xlab="time", ylab="# survey participants")
plot(freqTP_withGrant[,pertType], col="blue", type="l",xlim=c(0,26), ylim=c(0,50), xlab="time", ylab="# survey participants")
lines(freqTP_withoutGrant[,pertType])
abline(v=c(1:dim(i)[2]), col="lightgrey")
abline(v=b_mean10, col='red')
#abline(v=c(1:dim(i)[2])[which(freqTP_withGrant[,pertType]>25)], col="red")
dev.off()
#5-8 from stdev
pertType=2
set.seed(123)
stdevPlots=lapply(c(5:8), function(geneID){
perturbations[["stdev"]][which(perturbations[["stdev"]][,"GeneID"]==geneID),c(3:length(perturbations[["stdev"]][1,]))]
})
####combine these into a table for doing F1:
y1=sapply(stdevPlots, function(i){i[1,]})
stdevPlotsTransposed=lapply(stdevPlots, function(i){
m=as.matrix(t(i))
print(dim(m))
#control=m[,1]
#div=m[,2]+m[,3]-control
sm=apply(m, 2, function(j){if(sum(j)==0){j}else{
j/sum(abs(j))}})
sm
})
stdevPerturbations=sapply(c(1:4), function(geneID){
pertAll=generatePerturbations(stdevPlotsTransposed[[geneID]], c(1:26), numPert=10000, spline=1)
pert=pertAll$ft
tim=pertAll$time
temp=apply(pert, 2, function(i){
ap=approx(tim, i, xout=c(1:26))
ap$y-stdevPlotsTransposed[[geneID]][,1]
#plot(c(), xlim=c(0,26), ylim=c(-0.3, 0.3))
lines(c(1:26), ap$y-stdevPlotsTransposed[[geneID]][,1], col=rgb(0.1,0.1,0.1,0.03))
ap$y-stdevPlotsTransposed[[geneID]][,1]
})
#print(dim(temp))
apply(temp, 1, function(i){ mean(i)})
})
#y1=matrix(rep(0, 26*4), nrow=26, ncol=4)
#b=findPath(c(1:length(stdevPlots[[1]][1,])), t(y1), stdevPlotsTransposed, 5, 1, multiple=T, type=1, numPerts=10)
#b=findPath(c(1:length(stdevPlots[[1]][1,])), stdevPlots[[1]][1,], t(stdevPlots[[1]]), 5, 1, multiple=F, type=1, numPerts=10)
y_together=cbind(stdevPlotsTransposed[[1]][,1], stdevPlotsTransposed[[2]][,1], stdevPlotsTransposed[[3]][,1], stdevPlotsTransposed[[4]][,1])
b_stdev10=findPathF2(c(1:26), y_together, stdevPlotsTransposed, 10, spline=1, numPerts=10000, resampleTraining = F, mult=T)
b_stdev5=findPathF2(c(1:26), y_together, stdevPlotsTransposed, 5, spline=1, numPerts=10000, resampleTraining = F, mult=T)
#b_stdev10=findPathF1(c(1:26), stdevPerturbations, 10, spline=1, numPerts=0, resampleTraining = F)
#b_stdev5=findPathF1(c(1:26), stdevPerturbations, 5, spline=1, numPerts=0, resampleTraining = F)
plot(c(), xlim=c(0,26), ylim=c(-0.15, 0.15))
apply(stdevPerturbations, 2, function(i){lines(c(1:26), i)})
abline(v=b_stdev10)
pdf(paste("Figure_nitpicker_Results_stdevForSurvey2.pdf", sep=""), width=4, height=6,pointsize = 10)
par(mfrow=c(5,1));
par(mar=c(4, 4, 1, 1)+0.1);
par(cex=0.5)
stdevPlots=lapply(c(5:8), function(geneID){
perturbations[["stdev"]][which(perturbations[["stdev"]][,"GeneID"]==geneID),c(3:length(perturbations[["stdev"]][1,]))]
})
counter=1
for(i in stdevPlots){
plot(c(), c(), xlim=c(0,dim(i)[2]), ylim=c(0, max(i)),main=geneNames[counter], xlab="time", ylab="gene expression")
counter=counter+1
lineTypes=c(1,2,3)
for(j in c(1:dim(i)[1])){
lines(c(1:dim(i)[2]), i[j,], lty=lineTypes[j])
}
abline(v=c(1:dim(i)[2]), col="lightgrey")
abline(v=b_stdev10)
#abline(v=c(1:dim(i)[2])[which(freqTP_withGrant[,pertType]>25)], col="red")
}
plot(freqTP_withGrant[,pertType], col="blue", type="l", ylim=c(0,50), xlim=c(0, 26), xlab="time", ylab="# survey participants")
lines(freqTP_withoutGrant[,pertType])
abline(v=c(1:dim(i)[2]), col="lightgrey")
abline(v=b_stdev10)
#abline(v=c(1:dim(i)[2])[which(freqTP_withGrant[,pertType]>25)], col="red")
dev.off()
#9-12 from skew
pertType=3
set.seed(123)
skewPlots=lapply(c(9:12), function(geneID){
perturbations[["skew"]][which(perturbations[["skew"]][,"GeneID"]==geneID),c(3:length(perturbations[["skew"]][1,]))]
})
####combine these into a table for doing F1:
y1=sapply(skewPlots, function(i){i[1,]})
skewPlotsTransposed=lapply(skewPlots, function(i){
m=as.matrix(t(i))
print(dim(m))
#control=m[,1]
#div=m[,2]+m[,3]-control
sm=apply(m, 2, function(j){if(sum(j)==0){j}else{
j/sum(abs(j))}})
sm
})
skewPerturbations=sapply(c(1:4), function(geneID){
pertAll=generatePerturbations(skewPlotsTransposed[[geneID]], c(1:26), numPert=10000, spline=1)
pert=pertAll$ft
tim=pertAll$time
temp=apply(pert, 2, function(i){
ap=approx(tim, i, xout=c(1:26))
ap$y-skewPlotsTransposed[[geneID]][,1]
#plot(c(), xlim=c(0,26), ylim=c(-0.3, 0.3))
lines(c(1:26), ap$y-skewPlotsTransposed[[geneID]][,1], col=rgb(0.1,0.1,0.1,0.03))
ap$y-skewPlotsTransposed[[geneID]][,1]
})
#print(dim(temp))
apply(temp, 1, function(i){ mean(i)})
})
y1=matrix(rep(0, 26*4), nrow=26, ncol=4)
#b=findPath(c(1:length(skewPlots[[1]][1,])), t(y1), skewPlotsTransposed, 5, 1, multiple=T, type=1, numPerts=10)
#b=findPath(c(1:length(skewPlots[[1]][1,])), skewPlots[[1]][1,], t(skewPlots[[1]]), 5, 1, multiple=F, type=1, numPerts=10)
y_together=cbind(skewPlotsTransposed[[1]][,1], skewPlotsTransposed[[2]][,1], skewPlotsTransposed[[3]][,1], skewPlotsTransposed[[4]][,1])
b_skew10=findPathF2(c(1:26), y_together, skewPlotsTransposed, 10, spline=1, numPerts=10000, resampleTraining = F, mult=T)
b_skew5=findPathF2(c(1:26), y_together, skewPlotsTransposed, 5, spline=1, numPerts=10000, resampleTraining = F, mult=T)
# b_skew10=findPathF1(c(1:26), skewPerturbations, 10, spline=1, numPerts=0, resampleTraining = F)
# b_skew5=findPathF1(c(1:26), skewPerturbations, 5, spline=1, numPerts=0, resampleTraining = F)
plot(c(), xlim=c(0,26), ylim=c(-0.15, 0.15))
apply(skewPerturbations, 2, function(i){lines(c(1:26), i)})
abline(v=b_skew10)
pdf(paste("Figure_nitpicker_Results_skewForSurvey2.pdf", sep=""), width=4, height=6,pointsize = 10)
par(mfrow=c(5,1));
par(mar=c(4, 4, 1, 1)+0.1);
par(cex=0.5)
counter=1
for(i in skewPlots){
plot(c(), c(), xlim=c(0,dim(i)[2]), ylim=c(0, max(i)),main=geneNames[counter], xlab="time", ylab="gene expression")
counter=counter+1
lineTypes=c(1,2,3)
for(j in c(1:dim(i)[1])){
lines(c(1:dim(i)[2]), i[j,], lty=lineTypes[j])
}
abline(v=c(1:dim(i)[2]), col="lightgrey")
abline(v=b_skew10, col='red')
#abline(v=c(1:dim(i)[2])[which(freqTP_withGrant[,pertType]>25)], col="red")
}
plot(freqTP_withGrant[,pertType], col="blue", type="l", ylim=c(0,50), xlim=c(0, 26), xlab="time", ylab="# survey participants")
lines(freqTP_withoutGrant[,pertType])
abline(v=c(1:dim(i)[2]), col="lightgrey")
abline(v=b_skew10, col='red')
# abline(v=c(1:dim(i)[2])[which(freqTP_withGrant[,pertType]>25)], col="red")
dev.off()
#9-12 from amp
pertType=4
set.seed(123)
ampPlots=lapply(c(13:16), function(geneID){
perturbations[["amp"]][which(perturbations[["amp"]][,"GeneID"]==geneID),c(3:length(perturbations[["amp"]][1,]))]
})
####combine these into a table for doing F1:
y1=sapply(ampPlots, function(i){i[1,]})
ampPlotsTransposed=lapply(ampPlots, function(i){
m=as.matrix(t(i))
print(dim(m))
control=m[,1]
# div=m[,2]+m[,3]-control
sm=apply(m, 2, function(j){if(sum(j)==0){j}else{
rnorm(length(j), 0, 0.001)+j/sum(abs(control))}})
sm
})
ampPerturbations=sapply(c(1:4), function(geneID){
pertAll=generatePerturbations(ampPlotsTransposed[[geneID]], c(1:26), numPert=10000, spline=1)
pert=pertAll$ft
print(dim(pert))
tim=pertAll$time
print(tim)
temp=apply(pert, 2, function(i){
ap=approx(tim, i, xout=c(1:26))
ap$y-ampPlotsTransposed[[geneID]][,1]
#plot(c(), xlim=c(0,26), ylim=c(-0.3, 0.3))
lines(c(1:26), ap$y-ampPlotsTransposed[[geneID]][,1], col=rgb(0.1,0.1,0.1,0.03))
ap$y-ampPlotsTransposed[[geneID]][,1]
})
#print(dim(temp))
apply(temp, 1, function(i){ mean(i)})
})
y1=matrix(rep(0, 26*4), nrow=26, ncol=4)
#b=findPath(c(1:length(ampPlots[[1]][1,])), t(y1), ampPlotsTransposed, 5, 1, multiple=T, type=1, numPerts=10)
#b=findPath(c(1:length(ampPlots[[1]][1,])), ampPlots[[1]][1,], t(ampPlots[[1]]), 5, 1, multiple=F, type=1, numPerts=10)
#b_amp10=findPathF1(c(1:26), ampPerturbations, 10, spline=1, numPerts=0, resampleTraining = F)
#b_amp5=findPathF1(c(1:26), ampPerturbations, 5, spline=1, numPerts=0, resampleTraining = F)
y_together=cbind(ampPlotsTransposed[[1]][,1], ampPlotsTransposed[[2]][,1], ampPlotsTransposed[[3]][,1], ampPlotsTransposed[[4]][,1])
b_amp10=findPathF2(c(1:26), y_together, ampPlotsTransposed, 10, spline=1, numPerts=10000, resampleTraining = F, mult=T)
b_amp5=findPathF2(c(1:26), y_together, ampPlotsTransposed, 5, spline=1, numPerts=10000, resampleTraining = F, mult=T)
plot(c(), xlim=c(0,26), ylim=c(-0.15, 0.15))
apply(ampPerturbations, 2, function(i){lines(c(1:26), i)})
abline(v=b_amp10)
#13-16 from amp
pertType=4
pdf(paste("Figure_nitpicker_Results_ampForSurvey2.pdf", sep=""), width=4, height=6,pointsize = 10)
par(mfrow=c(5,1));
par(mar=c(4, 4, 1, 1)+0.1);
par(cex=0.5)
ampPlots=lapply(c(13:16), function(geneID){
perturbations[["amp"]][which(perturbations[["amp"]][,"GeneID"]==geneID),c(3:length(perturbations[["amp"]][1,]))]
})
counter=1
for(i in ampPlots){
plot(c(), c(), xlim=c(0,dim(i)[2]), ylim=c(0, max(i)),main=geneNames[counter], xlab="time", ylab="gene expression")
counter=counter+1
lineTypes=c(1,2,3)
for(j in c(1:dim(i)[1])){
lines(c(1:dim(i)[2]), i[j,], lty=lineTypes[j])
}
abline(v=c(1:dim(i)[2]), col="lightgrey")
# abline(v=c(1:dim(i)[2])[which(freqTP_withGrant[,pertType]>25)], col="red")
abline(v=b_amp10, col='red')
}
plot(freqTP_withGrant[,pertType], col="blue", type="l", ylim=c(0,50), xlim=c(0, 26), xlab="time", ylab="# survey participants")
lines(freqTP_withoutGrant[,pertType])
abline(v=c(1:dim(i)[2]), col="lightgrey")
#abline(v=c(1:dim(i)[2])[which(freqTP_withGrant[,pertType]>25)], col="red")
abline(v=b_amp10, col='red')
dev.off()
#17 from mean, 18 from stdev, 19 from skew, 20 from amp
####Draw average perturbations and lines
pdf(paste("Figure_nitpicker_Results_avgPerturbationSize.pdf", sep=""), width=7, height=6,pointsize = 12)
par(mfrow=c(2,2));
par(mar=c(4, 4, 1, 1)+0.1);
par(cex=1)
plot(c(), xlim=c(0,26), ylim=c(-0.15, 0.15), xlab='time', ylab='avg. change in gene expression', main='mean')
apply(meanPerturbations, 2, function(i){lines(c(1:26), i)})
abline(v=b_mean5)
plot(c(), xlim=c(0,26), ylim=c(-0.15, 0.15), xlab='time', ylab='avg. change in gene expression', main='standard deviation')
apply(stdevPerturbations, 2, function(i){lines(c(1:26), i)})
abline(v=b_stdev5)
plot(c(), xlim=c(0,26), ylim=c(-0.15, 0.15), xlab='time', ylab='avg. change in gene expression', main='skew')
apply(skewPerturbations, 2, function(i){lines(c(1:26), i)})
abline(v=b_skew5)
plot(c(), xlim=c(0,26), ylim=c(-0.15, 0.15), xlab='time', ylab='avg. change in gene expression', main='amplitude')
apply(ampPerturbations, 2, function(i){lines(c(1:26), i)})
abline(v=b_amp5)
dev.off()
pdf(paste("Figure_nitpicker_Results_avgPerturbationSize_10pts.pdf", sep=""), width=7, height=6,pointsize = 12)
par(mfrow=c(2,2));
par(mar=c(4, 4, 1, 1)+0.1);
par(cex=1)
plot(c(), xlim=c(0,26), ylim=c(-0.15, 0.15), xlab='time', ylab='avg. change in gene expression', main='mean')
apply(meanPerturbations, 2, function(i){lines(c(1:26), i)})
abline(v=b_mean10)
plot(c(), xlim=c(0,26), ylim=c(-0.15, 0.15), xlab='time', ylab='avg. change in gene expression', main='standard deviation')
apply(stdevPerturbations, 2, function(i){lines(c(1:26), i)})
abline(v=b_stdev10)
plot(c(), xlim=c(0,26), ylim=c(-0.15, 0.15), xlab='time', ylab='avg. change in gene expression', main='skew')
apply(skewPerturbations, 2, function(i){lines(c(1:26), i)})
abline(v=b_skew10)
plot(c(), xlim=c(0,26), ylim=c(-0.15, 0.15), xlab='time', ylab='avg. change in gene expression', main='amplitude')
apply(ampPerturbations, 2, function(i){lines(c(1:26), i)})
abline(v=b_amp10)
dev.off()
#########How much diversity is there in terms of responses by person?
TP_withGrant=lapply(c(1:4), function(i){
sapply(c(1:26), function(j){
apply(survey[,paste("Q", i, "T", c(1:10), sep="")], 1, function(k){if(j %in% k){1}else{0}})
})
})
TP_withoutGrant=lapply(c(1:4), function(i){
sapply(c(1:26), function(j){
apply(survey[,paste("Q", i, "T", c(1:5), sep="")], 1, function(k){if(j %in% k){1}else{0}})
})
})
pdf(paste("Figure_nitpicker_Results_meanTimePointsSelected3.pdf", sep=""), width=5, height=5,pointsize = 12)
par(mfrow=c(1,1));
par(mar=c(4, 4, 1, 1)+0.1);
par(cex=1)
greyscale <- c("darkgrey", 'white')
pal <- colorRampPalette(greyscale)(100)
type=c('Mean', 'Standard Dev', 'Skew', 'Amplitude')
lapply(c(1:4), function(i){
heatmap(-TP_withoutGrant[[i]], scale="none", main=type[i], Colv=NA, ylab='survey participant', xlab='time point selected', col=pal, labRow=NA)
})
sapply(c(1:4), function(i){
heatmap(-TP_withGrant[[i]], scale="none", Colv=NA, main=type[i], ylab='survey participant', xlab='time point selected', col=pal, labRow=NA)
grab_grob()
})
# heatmap(-TP_withoutGrant[[4]], scale="none", Colv=NA, ylab='survey participant', xlab='time point selected', col=pal, labRow=NA)
dev.off()
################Check how well everyone performs on data sampled from the same distributions
files=paste("simulateSkewedNormal_", c("mean", "stdev", "skew", "amp"), "_test_inputs.txt", sep="")
perturbationsTest=lapply(files, function(i){
a=read.table(i, header=T)
a=a[,c(1,2,seq(3, 80, 3))]
a
})
########compare the perturbations generated by NITPicker with the real perturbations
#meanPertsAll
pdf(paste("Figure_nitpicker_compareExtraPerts_and_NITPickDist_mean.pdf", sep=""), width=8, height=15,pointsize = 12)
par(mfrow=c(4,2));
par(mar=c(4, 4, 1, 1));
par(cex=1)
starts=c(1,5,9,13)
ends=c(4,8,12,16)
i=1
lapply(1:4, function(j){
plot(c(), ylim=c(0, 0.15), xlim=c(0,26), ylab='gene expression', xlab='time')
submat=perturbationsTest[[i]][which(as.numeric(perturbationsTest[[i]][,2])==j),3:28]
apply(submat, 1, function(k){
print(length(as.numeric(k)))
lines(c(1:26), as.numeric(k), col=rgb(0.1,0.1,0.1,0.01))
})
plot(c(), ylim=c(0, 0.15), xlim=c(0,26), ylab='gene expression', xlab='time')
submat=t(meanPertsAll[[j]])
apply(submat, 1, function(k){
print(length(as.numeric(k)))
lines(c(1:26), as.numeric(k), col=rgb(0.1,0.1,0.1,0.01))
})
})
dev.off()
##Check that the results seem reasonable-- are these actually extra perturbations of the above?
pdf(paste("Figure_nitpicker_testExtra_perturbations.pdf", sep=""), width=5, height=15,pointsize = 12)
par(mfrow=c(4,1));
par(mar=c(4, 4, 1, 1)+0.1);
par(cex=1)
starts=c(1,5,9,13)
ends=c(4,8,12,16)
lapply(c(1:4), function(i){
lapply(c(starts[i]:ends[i]), function(j){
plot(c(), ylim=c(0, 0.15), xlim=c(0,26))
submat=perturbationsTest[[i]][which(as.numeric(perturbationsTest[[i]][,2])==j),3:28]
apply(submat, 1, function(k){
lines(c(1:26), as.numeric(k), col=rgb(0.1,0.1,0.1,0.01))
})
})
})
dev.off()
diffFromControl= lapply(c(1:4), function(i){
sapply(c(starts[i]:ends[i]), function(j){
plot(c(), ylim=c(-0.1, 0.1), xlim=c(0,26))
submat=perturbationsTest[[i]][which(as.numeric(perturbationsTest[[i]][,2])==j),3:28]
control=(perturbations[[i]][which(as.numeric(perturbations[[i]][,2])==j),3:28])[1,]
apply(submat, 1, function(k){
lines(c(1:26), as.numeric(k)-as.numeric(control), col=rgb(0.1,0.1,0.1,0.01))
as.numeric(k)-as.numeric(control)
})
})
})
scoreBiologists = lapply(c(1:4), function(i){
sapply(c(starts[i]:ends[i]), function(j){
# plot(c(), ylim=c(-0.1, 0.1), xlim=c(0,26))
submat=perturbationsTest[[i]][which(as.numeric(perturbationsTest[[i]][,2])==j),3:28]
control=(perturbations[[i]][which(as.numeric(perturbations[[i]][,2])==j),3:28])[1,]
apply(TP_withoutGrant[[i]], 1, function(indexBin){
index=which(indexBin==1)
sum(apply(submat, 1, function(k){
#lines(c(1:26), as.numeric(k)-as.numeric(control), col=rgb(0.1,0.1,0.1,0.01))
# print(k)
# print(control)
# print(paste('index', index))
L2(c(1:26), k,
as.numeric(control), 1, 26, index)
}))
})
})
})
scoreRandom = lapply(c(1:4), function(i){
sapply(c(starts[i]:ends[i]), function(j){
# plot(c(), ylim=c(-0.1, 0.1), xlim=c(0,26))
submat=perturbationsTest[[i]][which(as.numeric(perturbationsTest[[i]][,2])==j),3:28]
control=(perturbations[[i]][which(as.numeric(perturbations[[i]][,2])==j),3:28])[1,]
apply(TP_withoutGrant[[i]], 1, function(indexBin){
print(paste(i, j))
index=sample(length(indexBin), length(which(indexBin==1)))
print(length(index))
sum(apply(submat, 1, function(k){
#lines(c(1:26), as.numeric(k)-as.numeric(control), col=rgb(0.1,0.1,0.1,0.01))
# print(k)
# print(control)
# print(paste('index', index))
L2(c(1:26), k,
as.numeric(control), 1, 26, index)
}))
})
})
})
#score NITPicker
bestVals=list(b_mean5, b_stdev5, b_skew5, b_amp5)
scoreNITPick= lapply(c(1:4), function(i){
sapply(c(starts[i]:ends[i]), function(j){
# plot(c(), ylim=c(-0.1, 0.1), xlim=c(0,26))
submat=perturbationsTest[[i]][which(as.numeric(perturbationsTest[[i]][,2])==j),3:28]
control=(perturbations[[i]][which(as.numeric(perturbations[[i]][,2])==j),3:28])[1,]
index=bestVals[[i]]
sum(apply(submat, 1, function(k){
#lines(c(1:26), as.numeric(k)-as.numeric(control), col=rgb(0.1,0.1,0.1,0.01))
# print(k)
# print(control)
# print(paste('index', index))
L2(c(1:26), k,
as.numeric(control), 1, 26, index)
}))
})
})
pval_grantless=sapply(c(1:4), function(i){
sapply(c(1:4), function(j){
nit=scoreNITPick[[i]][j]
bio=scoreBiologists[[i]][,j]
length(which(nit>bio))
})
})
scoreBiologistsWithGrant = lapply(c(1:4), function(i){
sapply(c(starts[i]:ends[i]), function(j){
print(paste(i,j))
# plot(c(), ylim=c(-0.1, 0.1), xlim=c(0,26))
submat=perturbationsTest[[i]][which(as.numeric(perturbationsTest[[i]][,2])==j),3:28]
control=(perturbations[[i]][which(as.numeric(perturbations[[i]][,2])==j),3:28])[1,]
apply(TP_withGrant[[i]], 1, function(indexBin){
index=which(indexBin==1)
sum(apply(submat, 1, function(k){
#lines(c(1:26), as.numeric(k)-as.numeric(control), col=rgb(0.1,0.1,0.1,0.01))
# print(k)
# print(control)
# print(paste('index', index))
L2(c(1:26), k,
as.numeric(control), 1, 26, index)
}))
})
})
})
scoreRandomWithGrant = lapply(c(1:4), function(i){
sapply(c(starts[i]:ends[i]), function(j){
print(paste(i,j))
# plot(c(), ylim=c(-0.1, 0.1), xlim=c(0,26))
submat=perturbationsTest[[i]][which(as.numeric(perturbationsTest[[i]][,2])==j),3:28]
control=(perturbations[[i]][which(as.numeric(perturbations[[i]][,2])==j),3:28])[1,]
apply(TP_withGrant[[i]], 1, function(indexBin){
#index=which(indexBin==1)
index=sample(length(indexBin), length(which(indexBin==1)))
sum(apply(submat, 1, function(k){
#lines(c(1:26), as.numeric(k)-as.numeric(control), col=rgb(0.1,0.1,0.1,0.01))
# print(k)
# print(control)
# print(paste('index', index))
L2(c(1:26), k,
as.numeric(control), 1, 26, index)
}))
})
})
})
save(scoreRandomWithGrant, file='scoreRandomWithGrant.RData')
bestVals=list(b_mean10, b_stdev10, b_skew10, b_amp10)
scoreNITPickWithGrant= lapply(c(1:4), function(i){
sapply(c(starts[i]:ends[i]), function(j){
# plot(c(), ylim=c(-0.1, 0.1), xlim=c(0,26))
submat=perturbationsTest[[i]][which(as.numeric(perturbationsTest[[i]][,2])==j),3:28]
control=(perturbations[[i]][which(as.numeric(perturbations[[i]][,2])==j),3:28])[1,]
index=bestVals[[i]]
sum(apply(submat, 1, function(k){
#lines(c(1:26), as.numeric(k)-as.numeric(control), col=rgb(0.1,0.1,0.1,0.01))
# print(k)
# print(control)
# print(paste('index', index))
L2(c(1:26), k,
as.numeric(control), 1, 26, index)
}))
})
})
pval_granted=sapply(c(1:4), function(i){
sapply(c(1:4), function(j){
nit=scoreNITPickWithGrant[[i]][j]
bio=scoreBiologistsWithGrant[[i]][,j]
length(which(nit>bio))/50
})
})
pval_granted_toRandom=sapply(c(1:4), function(i){
sapply(c(1:4), function(j){
nit=scoreNITPickWithGrant[[i]][j]
bio=scoreRandomWithGrant[[i]][,j]
length(which(nit>bio))/50
})
})
####get rank for each biologist
avgRankWithGrant=sapply(c(1:50), function(individ){
ranks=sapply(c(1:4), function(i){
sapply(c(1:4), function(j){
b=scoreBiologistsWithGrant[[i]][individ, j]
bio=scoreBiologistsWithGrant[[i]][,j]
length(which(b>bio))
})
})
mean(ranks)
})
avgRankWithGrantless=sapply(c(1:50), function(individ){
ranks=sapply(c(1:4), function(i){
sapply(c(1:4), function(j){
b=scoreBiologists[[i]][individ, j]
bio=scoreBiologists[[i]][,j]
length(which(b>bio))
})
})
mean(ranks)
})
plot(avgRankWithGrant, avgRankWithGrantless)
cor(avgRankWithGrant, avgRankWithGrantless)
cor.test(avgRankWithGrant, avgRankWithGrantless)
####Now, let us determine whether certain heuristics improve score
strategyByPersonNumeric=apply(strategyByPerson, c(1,2), function(i){
as.numeric(substring(as.character(i), 1, 1)[[1]])
})
set.seed(123)
lr=cv.glmnet(strategyByPersonNumeric, (avgRankWithGrantless+avgRankWithGrant)/2)
plot(lr)
lambda=lr$lambda.min
coefLR=coef.cv.glmnet(lr, s='lambda.min')
yPred=predict(lr, strategyByPersonNumeric, s='lambda.min')
plot((avgRankWithGrantless+avgRankWithGrant)/2, yPred, xlab="average rank", ylab="predicted average rank")
cor.test(avgRankWithGrantless, yPred)
#who won?
which((avgRankWithGrantless+avgRankWithGrant)/2==min((avgRankWithGrantless+avgRankWithGrant)/2))
######How does it compare to random?
avgRankWithGrantComparedToRandom=sapply(c(1:50), function(individ){
ranks=sapply(c(1:4), function(i){
sapply(c(1:4), function(j){
b=scoreBiologistsWithGrant[[i]][individ, j]
bio=scoreRandomWithGrant[[i]][,j]
length(which(b>bio))
})
})
mean(ranks)
})
avgRankWithGrantlessComparedToRandom=sapply(c(1:50), function(individ){
ranks=sapply(c(1:4), function(i){
sapply(c(1:4), function(j){
b=scoreBiologists[[i]][individ, j]
bio=scoreRandom[[i]][,j]
length(which(b>bio))
})
})
mean(ranks)
})
avgRankRandWithGrantComparedToRandom=sapply(c(1:50), function(individ){
ranks=sapply(c(1:4), function(i){
sapply(c(1:4), function(j){
b=scoreRandomWithGrant[[i]][individ, j]
bio=scoreRandomWithGrant[[i]][,j]
length(which(b>bio))
})
})
mean(ranks)
})
avgRankRandWithGrantlessComparedToRandom=sapply(c(1:50), function(individ){
ranks=sapply(c(1:4), function(i){
sapply(c(1:4), function(j){
b=scoreRandom[[i]][individ, j]
bio=scoreRandom[[i]][,j]
length(which(b>bio))
})
})
mean(ranks)
})
pdf(paste("Figure_nitpicker_NITPickerVersusHuman.pdf", sep=""), width=4, height=3,pointsize = 12)
par(mfrow=c(1,1));
par(mar=c(4, 4, 1, 1)+0.1);
par(cex=1)
boxplot(as.numeric(pval_granted), as.numeric(pval_grantless), ylim=c(0, 50), ylab='rank', names=c('10 time points', '5 time points'))
dev.off()
##############Turn this into a figure:
set.seed(123)
lr=cv.glmnet(strategyByPersonNumeric, (avgRankWithGrantless+avgRankWithGrant)/2)
plot(lr)
lambda=lr$lambda.min
coefLR=coef.cv.glmnet(lr, s='lambda.min')
yPred=predict(lr, strategyByPersonNumeric, s='lambda.min')
pdf(paste("Figure_BiologistSuccess.pdf", sep=""), width=20, height=6.6,pointsize = 14)
par(mfrow=c(2,2));
par(mar=c(4, 4, 1, 1)+0.1);
par(cex=0.9)
plot(avgRankWithGrantless, avgRankWithGrant, xlab="average rank - 5 points", ylab="average rank - 10 points", pch=19, col='grey')
a=cor.test(avgRankWithGrantless, avgRankWithGrant)
text(3, 40, paste("p <",round(a$p.value, 7)))
plot((avgRankWithGrantless+avgRankWithGrant)/2, yPred, xlab="average rank", ylab="predicted average rank", pch=19, col='grey')
a=cor.test((avgRankWithGrantless+avgRankWithGrant)/2, yPred)
text(7, 27, paste("p <",round(a$p.value, 6)))
barplot2(as.numeric(coefLR[2:length(coefLR)]), names=c('peak expression', 'far apart', 'evenly spaced', 'largest perturb', 'greatest slope', 'multiple genes'), ylab="coefficient")
boxplot(avgRankWithGrantComparedToRandom, avgRankRandWithGrantComparedToRandom, avgRankWithGrantlessComparedToRandom, avgRankRandWithGrantlessComparedToRandom, names=c('biologists, 10 points', 'random, 10 points', 'biologists, 5 points', 'random, 5 points'), ylab='average score (lower is better)')
scoresByType=list(avgRankWithGrantComparedToRandom, avgRankRandWithGrantComparedToRandom, avgRankWithGrantlessComparedToRandom, avgRankRandWithGrantlessComparedToRandom)
sapply(c(1:4), function(i){
points(jitter(rep(0, length(scoresByType[[i]])), amount=0.1)+i, scoresByType[[i]], pch=20, col='orange')
})
dev.off()
t.test(avgRankWithGrantComparedToRandom, avgRankRandWithGrantComparedToRandom, alternative="less")
t.test(avgRankWithGrantlessComparedToRandom, avgRankRandWithGrantlessComparedToRandom, alternative="less")
#I can also evaluate whether scores correlate with feilds or level of profession
topics=strsplit(as.character(survey[,'field']),', ')
uniqueTopics=unique(unlist(topics))
topicDist=sapply(uniqueTopics, function(i){
sapply(topics, function(j){
if(i %in% j){
1
}else{0}
})
})
set.seed(123)
lr=randomForest(topicDist, (avgRankWithGrantless+avgRankWithGrant)/2)
plot(lr)
plot((avgRankWithGrantless+avgRankWithGrant)/2, lr$predicted)
cor.test((avgRankWithGrantless+avgRankWithGrant)/2, lr$predicted)
pdf(paste("Figure_BiologistSuccessByCareer.pdf", sep=""), width=6, height=4,pointsize = 14)
par(mfrow=c(1,1));
par(mar=c(4, 4, 1, 1)+0.1);
par(cex=0.9)
avgRankOverall=(avgRankWithGrantless+avgRankWithGrant)/2
boxplot(avgRankOverall[which(survey[,'level']=='Undergraduate or masters students')],
avgRankOverall[which(survey[,'level']=='PhD student')],
avgRankOverall[which(survey[,'level']=='Postdoctorate research associate')],
avgRankOverall[which(survey[,'level']=='Principle Investigator')], names=c('Undergrad/MA', 'PhD', 'Postdoc', 'PI'), xlab='career stage', ylab='avg. rank')
#jitter
byCarreerStage=list(avgRankOverall[which(survey[,'level']=='Undergraduate or masters students')],
avgRankOverall[which(survey[,'level']=='PhD student')],
avgRankOverall[which(survey[,'level']=='Postdoctorate research associate')],
avgRankOverall[which(survey[,'level']=='Principle Investigator')])
sapply(c(1:4), function(i){
points(jitter(rep(i, length(byCarreerStage[[i]]))), byCarreerStage[[i]], pch=20, col='orange')
})
dev.off()
setwd("~/Documents/NITPicker/SurveyResults")
#####Now we want to load up additional perturbations and graph the error functions over time
perturbs=c("mean", "sd", "skew", "amp")
bigPerturbations=lapply(paste("pert1000_", perturbs, ".txt", sep=""), function(i){
read.table(i, header=T, sep="\t")
})
pertType=1
original=bigPerturbations[[pertType]][1:4,3:28]
diff=lapply(c(0:3), function(i){
geneA=which(bigPerturbations[[pertType]][,2]==i)
mat=bigPerturbations[[pertType]][geneA[2:length(geneA)],3:28]
temp=apply(mat, 1, function(j){
a=as.numeric(j-original[(i+1),])
})
apply(temp, 1, function(j){
sort(j)[c(seq(1, 1000, 125), 1000)]
})
})
heatmap(diff[[1]])
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.