inst/doc/breakpointError.R

### R code from vignette source 'breakpointError.Rnw'

###################################################
### code chunk number 1: setup
###################################################
library(breakpointError)
latent <- c()
latent.segs <- data.frame()
first <- 1
for(mu in c(-2,0,2)){
  size <- 30
  last <- first + size -1
  latent.segs <- rbind(latent.segs,data.frame(first,last,mean=mu))
  latent <- c(latent,rep(mu,size))
  first <- last+1
}
latent.breaks <- estimateBreaks(latent)
atcg <- sample(c("A","T","C","G"),length(latent),replace=TRUE)
plotsig <- function(main){
  plot(0,0,xlab="base",ylab="signal",type="n",
       las=1,xaxt="n",xlim=c(1,length(latent)),ylim=c(-5,3),main=main)
  axis(1,c(1,latent.breaks,length(latent)))
  text(seq_along(latent),-5,atcg,cex=0.5)
}


###################################################
### code chunk number 2: latent
###################################################
plotsig("Latent signal (horizontal lines) and breakpoints (vertical dashed lines)")
print(latent.segs)
## The segment from base i to base j is drawn from i-1/2 to j+1/2.
with(latent.segs,segments(first-1/2,mean,last+1/2,mean,col=signal.colors["latent"]))
## And if there is a break after base i, it should be drawn at i+1/2.
abline(v=latent.breaks+1/2,col=signal.colors["latent"],lty="dashed")


###################################################
### code chunk number 3: latent-noisy
###################################################
set.seed(1)
d <- 18
base <- sort(sample(seq_along(latent), d))
signal <- rnorm(d, latent[base])
plotsig("Noisy observations (black points) and latent signal (blue lines)")
points(base, signal)
with(latent.segs,segments(first-1/2,mean,last+1/2,mean,col=signal.colors["latent"]))
abline(v=latent.breaks+1/2,col=signal.colors["latent"],lty="dashed")


###################################################
### code chunk number 4: estimated
###################################################
plotsig("Noisy observations (black points) and estimated signal (green lines)")
points(base, signal)
kmax <- 6
L <- run.cghseg(signal, base, kmax)
k <- 3
yhat <- subset(L$segments,segments==k)
print(yhat)
with(yhat,segments(first.base,mean,last.base,mean,col=signal.colors["estimate"]))
abline(v=yhat$first.base[-1],col=signal.colors["estimate"],lty="dashed")


###################################################
### code chunk number 5: shown-break
###################################################
print(L$breaks[[k]])


###################################################
### code chunk number 6: breaks
###################################################
str(L$breaks)


###################################################
### code chunk number 7: latent-breaks
###################################################
print(latent.breaks)


###################################################
### code chunk number 8: breakpoint-error-pieces
###################################################
make.fun <- function(L,x,R){
  force(L)
  force(x)
  force(R)
  function(g){
    ifelse(g<L,1,{
      ifelse(g<x,(x-g)/(x-L),{
        ifelse(g<R,(g-x)/(R-x),1)
      })
    })
  }
}

pieces <- make.fun(1,5,7)

make.args <- function(break.vec,signal.size){
  stopifnot(is.vector(break.vec))
  stopifnot(length(break.vec)>0)
  args.list <- list()
  for(i in seq_along(break.vec)){
    x <- break.vec[i]
    left <- if(i == 1){
      1
    }else{
      right+1
    }
    right <- if(i == length(break.vec)){
      signal.size-1
    }else{
      floor((x+break.vec[i+1])/2)
    }
    args.list[[i]] <- list(left,x,right)
  }
  args.list
}

set.seed(1)
reduce.by <- 3
offset.by <- 100
seg.mean <- c(-1,0,0.5)/reduce.by+offset.by
seg.size <- c(4,10,8)
ends <- cumsum(seg.size)+1 # up to not including this base
starts <- ends-seg.size
seg.df <- data.frame(starts,ends,seg.mean,what="signal")
breaks <- ends[-length(ends)]-1
size <- ends[length(ends)]-1
base.df <- data.frame(base=sample(c("A","T","C","G"),size,replace=TRUE),
                      position=1:size,what="signal",signal=-0.5/reduce.by+offset.by)
last.break <- size-1
piece.args <- make.args(breaks,size)
## label the region definition.
regions.df <- do.call(rbind,lapply(seq_along(piece.args),function(i){
  L <- piece.args[[i]]
  base <- unlist(L)
  symbol <- sprintf(c("$\\underline r_%d$","$B_%d$","$\\overline r_%d$"),i)
  hjust <- c(0,0,1)
  data.frame(base,symbol,what="error",cost=1.3,hjust,i)
}))
piece.funs <- lapply(piece.args,function(L)do.call(make.fun,L))
base <- 1:last.break
midpoints <- breaks[-1]-diff(breaks)/2
knots <- sort(c(-Inf,regions.df$base,Inf))
point.df <- do.call(rbind,lapply(seq_along(breaks),function(i){
  fun <- piece.funs[[i]]
  cost <- fun(base)
  data.frame(cost,base,i,what="error")
}))
text.df <- do.call(rbind,lapply(seq_along(piece.args),function(i){
  L <- piece.args[[i]]
  this.curve <- point.df[point.df$i==i,]
  min.point <- this.curve[which.min(this.curve$cost),]
  label <- sprintf("$\\ell_%d = C_{%d,%d,%d}$",i,L[[1]],L[[2]],L[[3]])
  data.frame(min.point,label)
}))
text.df$base <- text.df$base+c(1.5,2)
fun.df <- do.call(rbind,lapply(seq_along(breaks),function(i){
  fun <- piece.funs[[i]]
  cost <- fun(knots)
  data.frame(cost,base=knots,i,what="error")
}))
break.df <- do.call(rbind,lapply(seq_along(breaks),function(i){
  data.frame(base=breaks[i],i,what="error")
}))

library(ggplot2)
library(breakpointError)
x.breaks <- c(regions.df$base,
              max(point.df$base)+1)
p <- ggplot()+
  geom_text(aes(base,cost,colour=factor(i),
                label=symbol,hjust=hjust),
            data=regions.df,vjust=1)+
  geom_point(aes(base,cost,group=i,colour=factor(i)),
             data=point.df,pch=1,size=3)+
  geom_line(aes(base,cost,group=i,colour=factor(i),size=factor(i)),
            data=fun.df)+
  geom_rug(data=break.df)+
  scale_size_manual(values=c("1"=1.5,"2"=0.8)*1.5)+
  ##theme(title="Exact breakpoint error functions")+
  geom_segment(aes(starts-0.5,seg.mean,xend=ends-0.5,yend=seg.mean),
               data=seg.df,lwd=2,colour=signal.colors["latent"])+
  facet_grid(what~.,scales="free",space="free")+
  geom_text(aes(position,signal,label=base),data=base.df,size=3)+
  geom_text(aes(base,cost,group=i,colour=factor(i),label=label),
            data=text.df,hjust=0,vjust=-0.5)+
  guides(colour="none",size="none")+
  scale_y_continuous("",breaks=c(1,0),minor_breaks=NULL)+
  scale_x_continuous(breaks=x.breaks,minor_breaks=NULL)

library(tikzDevice)
options(tikzDocumentDeclaration=("\\documentclass[11pt]{article}"),
        tikzMetricsDictionary="tikzMetrics")
tikz("breakpoint-error-pieces.tex",w=5,h=2.5)
print(p)
dev.off()


###################################################
### code chunk number 9: breakpointError
###################################################
breakpointError(L$breaks[[3]], latent.breaks, length(latent))
sapply(L$breaks, breakpointError, latent.breaks, length(latent))


###################################################
### code chunk number 10: components
###################################################
e <- errorComponents(L$breaks, latent.breaks, length(latent))
library(reshape2)
dcast(e,segments~type,value.var="error")
library(ggplot2)
p <- ggplot(e,aes(segments,error))+
  geom_line(aes(size=type,colour=type,linetype=type))+
  scale_linetype_manual(values=fp.fn.linetypes)+
  scale_colour_manual(values=fp.fn.colors)+
  scale_size_manual(values=fp.fn.sizes)
library(directlabels)
dl <- direct.label(p+guides(linetype="none",colour="none",size="none"),
                   dl.combine("first.qp","last.qp"))
print(dl)


###################################################
### code chunk number 11: details
###################################################
str(errorDetails(L$breaks[[2]], latent.breaks, length(latent)))
str(errorDetails(L$breaks[[4]], latent.breaks, length(latent)))


###################################################
### code chunk number 12: guess.unidentified
###################################################
str(errorDetails(c(2,6,7), c(), 10))

Try the breakpointError package in your browser

Any scripts or data that you put into this service are public.

breakpointError documentation built on May 2, 2019, 5:22 p.m.