library("PSCBS")
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Simulating copy-number data
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
set.seed(0xBEEF)
# Number of loci
J <- 1000
x <- sort(runif(J, max=J)) * 1e5
mu <- double(J)
mu[200:300] <- mu[200:300] + 1
mu[350:400] <- NA # centromere
mu[650:800] <- mu[650:800] - 1
eps <- rnorm(J, sd=1/2)
y <- mu + eps
outliers <- seq(from=1L, to=J, length.out=0.2*J)
y[outliers] <- y[outliers] + 1.5
w <- rep(1.0, times=J)
w[outliers] <- 0.01
data <- data.frame(chromosome=1L, x=x, y=y)
dataW <- cbind(data, w=w)
par(mar=c(2,3,0.2,1)+0.1)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Single-chromosome segmentation
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Segment without weights
fit <- segmentByCBS(data)
sampleName(fit) <- "CBS_Example"
print(fit)
plotTracks(fit)
## Highlight outliers (they pull up the mean levels)
points(x[outliers]/1e6, y[outliers], col="purple")
# Segment with weights
fitW <- segmentByCBS(dataW)
sampleName(fitW) <- "CBS_Example (weighted)"
print(fitW)
drawLevels(fitW, col="red")
legend("topright", bg="white", legend=c("outliers", "non-weighted CBS", "weighted CBS"), col=c("purple", "purple", "red"), lwd=c(NA,3,3), pch=c(1,NA,NA))
## Assert that weighted segment means are less biased
dmean <- getSegments(fit)$mean - getSegments(fitW)$mean
cat("Segment mean differences:\n")
print(dmean)
stopifnot(all(dmean > 0, na.rm=TRUE))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Segmentation with some known change points
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
knownSegments <- data.frame(
chromosome=c( 1, 1),
start =x[c( 1, 401)],
end =x[c(349, J)]
)
fit2 <- segmentByCBS(dataW, knownSegments=knownSegments, verbose=TRUE)
sampleName(fit2) <- "CBS_Example_2 (weighted)"
print(fit2)
plotTracks(fit2)
abline(v=c(knownSegments$start, knownSegments$end)/1e6, lty=3)
# Chromosome boundaries can be specified as -Inf and +Inf
knownSegments <- data.frame(
chromosome=c( 1, 1),
start =c( -Inf, x[401]),
end =c(x[349], +Inf)
)
fit2b <- segmentByCBS(dataW, knownSegments=knownSegments, verbose=TRUE)
sampleName(fit2b) <- "CBS_Example_2b (weighted)"
print(fit2b)
plotTracks(fit2b)
abline(v=c(knownSegments$start, knownSegments$end)/1e6, lty=3)
# As a proof of concept, it is possible to segment just the centromere,
# which contains no data. All statistics will be NAs.
knownSegments <- data.frame(
chromosome=c( 1),
start =x[c(350)],
end =x[c(400)]
)
fit3 <- segmentByCBS(dataW, knownSegments=knownSegments, verbose=TRUE)
sampleName(fit3) <- "CBS_Example_3"
print(fit3)
plotTracks(fit3, Clim=c(0,5), xlim=c(0,100))
abline(v=c(knownSegments$start, knownSegments$end)/1e6, lty=3)
# If one specify the (empty) centromere as a segment, then its
# estimated statistics will be NAs, which becomes a natural
# separator between the two "independent" arms.
knownSegments <- data.frame(
chromosome=c( 1, 1, 1),
start =x[c( 1, 350, 401)],
end =x[c(349, 400, J)]
)
fit4 <- segmentByCBS(dataW, knownSegments=knownSegments, verbose=TRUE)
sampleName(fit4) <- "CBS_Example_4"
print(fit4)
plotTracks(fit4)
abline(v=c(knownSegments$start, knownSegments$end)/1e6, lty=3)
fit5 <- segmentByCBS(dataW, knownSegments=knownSegments, undo=Inf, verbose=TRUE)
sampleName(fit5) <- "CBS_Example_5"
print(fit5)
plotTracks(fit5)
abline(v=c(knownSegments$start, knownSegments$end)/1e6, lty=3)
stopifnot(nbrOfSegments(fit5) == nrow(knownSegments))
# One can also force a separator between two segments by setting
# 'start' and 'end' to NAs ('chromosome' has to be given)
knownSegments <- data.frame(
chromosome=c( 1, 1, 1),
start =x[c( 1, NA, 401)],
end =x[c(349, NA, J)]
)
fit6 <- segmentByCBS(dataW, knownSegments=knownSegments, verbose=TRUE)
sampleName(fit6) <- "CBS_Example_6"
print(fit6)
plotTracks(fit6)
abline(v=c(knownSegments$start, knownSegments$end)/1e6, lty=3)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Multi-chromosome segmentation
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
data2 <- data
data2$chromosome <- 2L
data <- rbind(data, data2)
dataW <- cbind(data, w=w)
par(mar=c(2,3,0.2,1)+0.1)
# Segment without weights
fit <- segmentByCBS(data)
sampleName(fit) <- "CBS_Example"
print(fit)
plotTracks(fit, Clim=c(-3,3))
# Segment with weights
fitW <- segmentByCBS(dataW)
sampleName(fitW) <- "CBS_Example (weighted)"
print(fitW)
drawLevels(fitW, col="red")
legend("topright", bg="white", legend=c("outliers", "non-weighted CBS", "weighted CBS"), col=c("purple", "purple", "red"), lwd=c(NA,3,3), pch=c(1,NA,NA))
## Assert that weighted segment means are less biased
dmean <- getSegments(fit)$mean - getSegments(fitW)$mean
cat("Segment mean differences:\n")
print(dmean)
stopifnot(all(dmean > 0, na.rm=TRUE))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.