Nothing
### R code from vignette source 'variableDensity.Rnw'
###################################################
### code chunk number 1: setup
###################################################
lambda.hat <- function(lambda, error){
l <- which(error == min(error))
chosen <- l[ceiling(length(l)/2)]
lambda[chosen]
}
library(breakpointError)
kmax <- 8
sample.signal <- function(d){
locations <- round(seq(1,length(mu),l=d))
last <- as.integer(locations[length(locations)])
this.signal <- signal[locations]
result <- run.cghseg(this.signal,locations,kmax)
result$bases.per.probe <- factor(round(last/d))
result$locations <- locations
result$signal <- this.signal
result$error <- sapply(result$breaks,function(breaks){
breakpointError(mu.break.after, breaks, last)
})
result$crit <- function(lambda,alpha){
stopifnot(length(alpha)==1)
J <- result$J.est
Kseq <- seq_along(J)
lambda * Kseq * d^alpha + J
}
result$lambda <- 10^seq(-9,9,l=200)
result$kstar <- function(lambda,a){
which.min(result$crit(lambda,a))
}
result$lambda.error <- function(a){
k <- sapply(result$lambda, result$kstar, a)
result$error[k]
}
result$lambda.hat <- function(a){
e <- result$lambda.error(a)
lambda.hat(result$lambda, e)
}
result
}
set.seed(1)
seg.size <- c(1,2,1,2)*1e5
means <- c(-1,0,1,0)
seg.df <- data.frame()
mu <- c()
first.base <- 1
for(i in seq_along(means)){
N <- seg.size[i]
seg.mu <- means[i]
mu <- c(mu,rep(seg.mu,N))
last.base <- first.base+N-1
seg.df <- rbind(seg.df,data.frame(first.base,last.base,seg.mu))
first.base <- last.base+1
}
mu.break.after <- which(diff(mu)!=0)
norm.sd <- 1
signal <- rnorm(length(mu), mu, norm.sd)
## here we define the size of the signals.
variable.density.signals <- list()
signal.size <- c(2000,1000,200,50)*length(means)
n.signals <- length(signal.size)
for(sig.i in 1:n.signals){
cat(sprintf("simulating signal %4d / %4d\n",sig.i,n.signals))
d <- signal.size[sig.i]
variable.density.signals[[sig.i]] <- sample.signal(d)
}
###################################################
### code chunk number 2: variable-density-signals
###################################################
## now make the signals for plotting.
variable.density.show <-
variable.density.signals[c(1,length(variable.density.signals))]
signal.df <- do.call(rbind,lapply(variable.density.show,function(sig){
with(sig,data.frame(locations, signal, bases.per.probe))
}))
library(ggplot2)
p <- ggplot(signal.df,aes(locations,signal))+
geom_point(pch=21)+
geom_segment(aes(first.base-1/2,seg.mu,xend=last.base+1/2,yend=seg.mu),
data=seg.df,colour=signal.colors["latent"])+
geom_vline(aes(xintercept=first.base-1/2),linetype="dashed",
data=seg.df[-1,],colour=signal.colors["latent"])+
facet_grid(bases.per.probe~., labeller=label_both)+
xlab("position in base pairs")
print(p)
###################################################
### code chunk number 3: variable-density-berr-k
###################################################
err.df <- do.call(rbind,lapply(variable.density.show,function(sig){
bases.per.probe <- sig$bases.per.probe
error <- sig$error
data.frame(bases.per.probe,segments=seq_along(error),error)
}))
library(ggplot2)
leg <- "bases/probe"
bpp.colors <- c("#00bfc4","#f8766d")
kplot <- ggplot(err.df,aes(segments,error))+
geom_line(aes(colour=bases.per.probe))+
geom_point()+
facet_grid(.~bases.per.probe, labeller=label_both)+
scale_colour_manual(leg,values=bpp.colors,guide="none")+
scale_x_continuous(minor_breaks=NULL)
print(kplot)
###################################################
### code chunk number 4: breakpointError
###################################################
lerr.df <- do.call(rbind,lapply(variable.density.show,function(sig){
bases.per.probe <- round(max(sig$locations)/length(sig$signal))
do.call(rbind,lapply(c(1,1/2,0),function(a){
error <- sig$lambda.error(a)
lambda <- sig$lambda
l.hat <- lambda.hat(lambda, error)
optimal <- l.hat == lambda
data.frame(error,lambda,alpha=a,bases.per.probe,optimal)
}))
}))
bpp <- lerr.df$bases.per.probe
lerr.df$bases.per.probe <- factor(bpp,sort(unique(bpp)))
library(ggplot2)
sizes <- c(1,0.5)
names(sizes) <- names(bpp.colors) <- levels(lerr.df$bases.per.probe)
p <- ggplot(lerr.df,aes(log10(lambda),error))+
geom_line(aes(size=bases.per.probe,colour=bases.per.probe))+
geom_point(aes(fill=bases.per.probe),pch=21,data=subset(lerr.df,optimal))+
facet_grid(.~alpha, labeller=label_both)+
scale_size_manual(leg,values=sizes)+
scale_fill_manual(leg,values=bpp.colors)+
scale_color_manual(leg,values=bpp.colors)
print(p)
###################################################
### code chunk number 5: variable-density-error-train
###################################################
a.grid <- c(0,1/2,1)
lambda <- variable.density.signals[[1]]$lambda
err.list <- lapply(a.grid,function(a){
err.mat <- sapply(variable.density.signals,function(sig){
sig$lambda.error(a)
})## matrix[lambda,signal]
error <- rowSums(err.mat)
data.frame(alpha=a,error,lambda,optimal=error==min(error))
})
err.curves <- do.call(rbind,err.list)
dots <- subset(err.curves,optimal)
opt.err <- do.call(rbind,lapply(err.list,function(df)subset(df,optimal)[1,]))
library(ggplot2)
opt.err$text.at <- -8
opt.err$hjust <- 0
opt.err$vjust <- -0.5
on.right <- nrow(opt.err)
opt.err$text.at[on.right] <- 8
opt.err$hjust[on.right] <- 1
opt.err$vjust[on.right] <- -0.5#1.5
opt.color <- "red"
p <- ggplot(,aes(log10(lambda),error))+
geom_line(lwd=1.1,data=err.curves)+
##geom_point(pch=1,size=4,data=dots)+
geom_segment(aes(yend=error,xend=text.at),data=opt.err,
colour=opt.color)+
geom_text(aes(text.at,label=round(error,1),hjust=hjust,vjust=vjust),
data=opt.err,colour=opt.color)+
facet_grid(.~alpha,labeller=label_both)
print(p)
###################################################
### code chunk number 6: variable-density-error-alpha
###################################################
estimate.error <- function(a,train,test){
l.hat <- train$lambda.hat(a)
test.k <- test$kstar(l.hat,a)
test$error[test.k]
}
test.all.pairs <- function(a,signal.list){
n.signals <- length(signal.list)
test.error <- c()
for(i in 1:(n.signals-1)){
cat(sprintf("alpha=%10f signal %5d / %5d\n",a,i,n.signals-1))
for(j in (i+1):n.signals){
err <- estimate.error(a,signal.list[[i]],signal.list[[j]])
err2 <- estimate.error(a,signal.list[[j]],signal.list[[i]])
test.error <- c(test.error,err,err2)
}
}
test.error
}
a.df <- data.frame()
a.grid <- c(2,seq(0,1.5,by=0.1),-0.5,-1,0.45,0.55)
for(a in a.grid){
test.error <- test.all.pairs(a,variable.density.signals)
err.df <- data.frame(test.error,pair=seq_along(test.error),alpha=a)
a.df <- rbind(a.df,err.df)
}
train.df <- do.call(rbind,lapply(a.grid,function(a){
err.mat <- sapply(variable.density.signals,function(sig){
sig$lambda.error(a)
})## matrix[lambda,signal]
error <- rowSums(err.mat)
data.frame(alpha=a,mean=min(error),sd=NA,what="train")
}))
test.df <- do.call(rbind,lapply(a.grid,function(a){
test <- subset(a.df,alpha == a)
data.frame(alpha=a,mean=mean(test$test.error),
sd=sd(test$test.error),what="test")
}))
stat.df <- rbind(train.df,test.df)
library(ggplot2)
p <- ggplot(stat.df,aes(alpha,mean))+
geom_ribbon(aes(ymin=ifelse(mean-sd<0,0,mean-sd),ymax=mean+sd),alpha=1/2)+
geom_line()+
facet_grid(what~.,scales="free")
print(p)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.