knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(data.table) data(profile614chr2, package="gfpop") profile614chr2$probes[, change.after := floor( c(diff(position)/2+position[-.N], NA)) ] sngraph <- function(n.segs, type, gap){ stopifnot(is.integer(n.segs), length(n.segs)==1, n.segs >= 1) s <- n.segs-1 seg.vec <- 1:s gfpop::graph( gfpop::StartEnd(start=0, end=s), gfpop::Edge(seg.vec-1, seg.vec, type, gap=gap), all.null.edges = TRUE) } select.dt <- rbind( data.table(n.segments = c(3, 7, 13), graph.name = "std", gap = 0)) #data.table(n.segments = 13, graph.name = "abs", gap = 1)) seg.dt.list <- list() for(model.i in 1:nrow(select.dt)){ model.info <- select.dt[model.i] cat(sprintf("%4d / %4d models\n", model.i, nrow(select.dt))) g <- model.info[, sngraph(as.integer(n.segments), graph.name, gap=gap)] fit <- gfpop::gfpop( profile614chr2$probes$logratio, mygraph = g, type = "mean") end.i <- fit$changepoints change.i <- end.i[-length(end.i)] change.pos <- profile614chr2$probes$change.after[change.i] seg.dt.list[[model.i]] <- data.table( model.info, segStart=c(profile614chr2$probes[1, position], change.pos), segEnd=c(change.pos, profile614chr2$probes[.N, position]), mean=fit$parameters) } seg.dt <- do.call(rbind, seg.dt.list) selData <- as.data.table(profile614chr2$probes) labels_lp <- as.data.table(profile614chr2$labels) modifiedLabels <- data.frame("start" = c(), "end" = c(), "breaks" = c()) for(i in 1:nrow(labels_lp)){ min <- labels_lp[[i,1]] max <- labels_lp[[i,2]] annotation <- labels_lp[[i,3]] positions <- selData[position >= min & position <= max][,1] change <- 0 if(annotation == "1breakpoint"){ change <- 1 } else{ change <- 0 } modifiedLabels <- rbind(modifiedLabels, data.frame(start=min(positions), end=max(positions), breaks=change)) } labels_lp <- data.frame("start" = c(), "end" = c(), "breaks" = c()) for(i in 1:nrow(modifiedLabels)){ start <- modifiedLabels[i, 1] end <- modifiedLabels[i, 2] change <- modifiedLabels[i, 3] index1 <- which(selData[,position] == start) index2 <- which(selData[,position] == end) labels_lp <- rbind(labels_lp, data.frame(start = index1, end = index2, breaks = change)) } labels_lp <- labels_lp[order(labels_lp$start),] labelled_fit <- LabelledOpart::labelled_opart_gaussian(selData$logratio, labels_lp, 15) positions <- selData[labelled_fit$end.vec] segments <- data.frame("n.segments" <- c(), "graph.name"<- c(), "gap"<- c(), "segStart" = c(), "segEnd" = c(), "mean" = c()) prev <- 1 for(pos in labelled_fit$end.vec){ avg <- mean(selData[prev:pos,]$logratio) segments <- rbind(segments, data.frame(n.segments=13,graph.name="lopart",gap=1,segStart=selData[prev]$position, segEnd=selData[pos]$position, mean=avg)) prev <- pos } seg.dt <- rbind(seg.dt, segments) select.dt <- rbind( data.table(n.segments = c(3, 7, 13), graph.name = "std", gap = 0), data.table(n.segments = 13, graph.name = "lopart", gap = 1)) some.segs <- seg.dt[select.dt, on=list(n.segments, graph.name), allow.cartesian=TRUE] some.change <- some.segs[min(segStart) < segStart] some.change[, change := segStart] err.dt <- some.segs[, { change.dt <- .SD[min(segStart) < segStart] change.dt[, change := segStart] change.dt[, prob := 1] change.dt[, nseg := n.segments] model.dt <- data.table(nseg=n.segments, prob=1) penaltyLearning::labelError( model.dt, data.table(prob=1, profile614chr2$labels), change.dt, change.var="change", model.vars="nseg", label.vars=c("labelStart", "labelEnd"), problem.vars="prob")$label.errors }, by=list(graph.name, n.segments)] err.dt[, list( fp=sum(fp), fn=sum(fn), errors=sum(fp+fn) ), by=list(graph.name, n.segments)] win <- function(min,max)data.table(windowStart=min*1e5,windowEnd=max*1e5) windows <- rbind( win( 65, 71), win( 148, 171), win( 354, 361), win(1059,1065)) mb.fac <- 1e6 wfac <- function(x){ factor(x, c("6.5-7.1", "14.8-17.1", "35.4-36.1", "105.9-106.5")) } windows[, window := wfac(sprintf( "%.1f-%.1f", windowStart/mb.fac, windowEnd/mb.fac))] setkey(windows, windowStart, windowEnd) f <- function(dt, key.vec){ setkeyv(dt, key.vec) dt } profile614chr2$probes[, pos0 := position] over.list <- list( changes=f(some.change, c("change", "segStart")), segments=f(some.segs, c("segStart", "segEnd")), labels=f(profile614chr2$labels, c("labelStart", "labelEnd")), errors=f(err.dt, c("labelStart", "labelEnd")), probes=f(profile614chr2$probes, c("position", "pos0"))) join.list <- lapply(over.list, foverlaps, windows, nomatch=0L) show.err <- join.list$errors[, list( fp=sum(fp), fn=sum(fn), errors=sum(fp+fn) ), by=list(graph.name, n.segments)] library(ggplot2) br <- c(6.5,7.0,seq(15,17,by=0.5),35.5,36,106,106.5) names(br) <- as.character(br) gg.out <- ggplot()+ theme_bw()+ theme(panel.spacing=grid::unit(0, "lines"))+ coord_cartesian(expand=FALSE)+ facet_grid( graph.name + n.segments ~ window, ##labeller=label_both, scales="free",space="free")+ penaltyLearning::geom_tallrect(aes( xmin=labelStart/mb.fac, xmax=labelEnd/mb.fac, fill=annotation), color="grey", data=join.list$labels)+ scale_fill_manual( "label", values=penaltyLearning::change.colors)+ penaltyLearning::geom_tallrect(aes( xmin=labelStart/mb.fac, xmax=labelEnd/mb.fac, linetype=status), fill=NA, size=1, color="black", data=join.list$errors)+ scale_linetype_manual( "error type", limits=c("correct", "false negative", "false positive"), values=c(correct=0, "false negative"=3, "false positive"=1))+ geom_point(aes( position/mb.fac, logratio), shape=1, color="grey50", data=join.list$probes)+ scale_y_continuous( "Logratio (Measure of DNA copy number)", breaks=seq(-2, 2, by=2) )+ scale_x_continuous("Position on chr2 (mega bases)", breaks=br)+ scale_color_manual(values=c( std="green", lopart="deepskyblue"))+ geom_segment(aes( ifelse(segStart < windowStart, windowStart, segStart)/mb.fac, mean, color=graph.name, xend=ifelse(windowEnd < segEnd, windowEnd, segEnd)/mb.fac, yend=mean), data=join.list$segments, size=1)+ geom_vline(aes( xintercept=change/mb.fac, color=graph.name), linetype="dashed", size=0.75, data=join.list$changes)+ geom_text(aes( 14.8, -3, label=sprintf( " %d label error%s for %d segment %s model", errors, ifelse(errors==1, "", "s"), n.segments, graph.name)), hjust=0, vjust=0, data=data.table(show.err, window=wfac("14.8-17.1"))) ##png("figure-relevant-changes-copy-number.png", 8, 4, units="in", res=300, type="cairo") print(gg.out) ##dev.off()
selData <- as.data.table(profile614chr2$probes) labels_lp <- as.data.table(profile614chr2$labels) modifiedLabels <- data.frame("start" = c(), "end" = c(), "breaks" = c()) for(i in 1:nrow(labels_lp)){ min <- labels_lp[[i,1]] max <- labels_lp[[i,2]] annotation <- labels_lp[[i,3]] positions <- selData[position >= min & position <= max][,1] change <- 0 if(annotation == "1breakpoint"){ change <- 1 } else{ change <- 0 } modifiedLabels <- rbind(modifiedLabels, data.frame(start=min(positions), end=max(positions), breaks=change)) } labels_lp <- data.frame("start" = c(), "end" = c(), "breaks" = c()) for(i in 1:nrow(modifiedLabels)){ start <- modifiedLabels[i, 1] end <- modifiedLabels[i, 2] change <- modifiedLabels[i, 3] index1 <- which(selData[,position] == start) index2 <- which(selData[,position] == end) labels_lp <- rbind(labels_lp, data.frame(start = index1, end = index2, breaks = change)) } labels_lp <- labels_lp[order(labels_lp$start),] labelled_fit <- LabelledOpart::labelled_opart_gaussian(selData$logratio, labels_lp, 15) positions <- selData[labelled_fit$end.vec] segments <- data.frame("n.segments" <- c(), "graph.name"<- c(), "gap"<- c(), "segStart" = c(), "segEnd" = c(), "mean" = c()) prev <- 1 for(pos in labelled_fit$end.vec){ avg <- mean(selData[prev:pos,]$logratio) segments <- rbind(segments, data.frame(n.segments=13,graph.name="lopart",gap=2,segStart=selData[prev]$position, segEnd=selData[pos]$position, mean=avg)) prev <- pos }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.