Nothing
## rdafiles <- list.files(".", pattern = "locs.*rda")
## for (file in rdafiles)
## {
## local({
## cat(file, fill = TRUE)
## load(file)
## assign(gsub(".rda", "", file, fixed = TRUE),
## locs,
## envir = .GlobalEnv)
## })
## }
if (FALSE)
{
library("org.Mm.eg.db")
eg.sym <-
toTable(org.Mm.egSYMBOL2EG[c("Tnnc2", "Des", "Myh3", "Myog", "Mylpf",
"Myl1", "Acta1", "Ckm", "Cdh15", "Myl4",
"Itga7")])
eg.chrloc <-
toTable(org.Mm.egCHRLOC[ eg.sym$gene_id ])
merge(eg.sym, eg.chrloc)
}
library(lattice)
library(simplehmm)
allLocs <- function(chr, lane, jitter = TRUE)
## jitter to make the locations more or less unique (otherwise we
## have problems downstream)
{
load(sprintf("locs_%s_%s.rda", lane, chr))
locs <- with(locs, c(forward, reverse))
if (jitter) locs + runif(length(locs))
else locs
}
base.lane <- 8
data.lane <- 2
target.hits <- 30
initial.states <- c(5, 20, 40)
## create list suitable for subsequent work. We use sapply instead of
## lapply because it uses the first argument as names.
chrs.to.use <-
c("chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", ## "chr8",
"chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15",
"chr16", "chr17", "chr18", "chr19")
base.locs <-
sapply(chrs.to.use,
allLocs, lane = base.lane,
simplify = FALSE)
data.locs <-
sapply(chrs.to.use,
allLocs, lane = data.lane,
simplify = FALSE)
names(data.locs)
chromosomes <- names(data.locs)
if (FALSE)
{
histogram(~ data/1e6 | which,
data =
make.groups(base = base.locs$chr6,
data = data.locs$chr6),
nint = 1000,
layout = c(1, 2), type = "count",
col = "magenta", border = "transparent")
QAhist <- function(file = "locs_3_chr6.rda", xlab = "Location (mb)", ...)
{
load(file)
foo <-
histogram(~ I(data/1e6) | which,
data = do.call(make.groups, locs),
nint = 1000,
main = file,
xlab = xlab,
layout = c(1, 2), type = "count",
col = "magenta", border = "transparent", ...)
plot(foo)
}
pdf("QA-hist.pdf")
QAhist("locs_1_chr6.rda")
QAhist("locs_2_chr6.rda")
QAhist("locs_3_chr6.rda")
QAhist("locs_4_chr6.rda")
QAhist("locs_5_chr6.rda")
QAhist("locs_6_chr6.rda")
QAhist("locs_7_chr6.rda")
QAhist("locs_8_chr6.rda")
dev.off()
}
base.hits <- sapply(base.locs, length)
data.hits <- sapply(data.locs, length)
rel <- data.hits / base.hits
getIntervals <-
function(reflocs, obslocs,
target = 15,
prop = length(obslocs) / length(reflocs))
{
nbins <- ceiling(length(reflocs) * prop / target)
quantile(reflocs, prob = ppoints(nbins, a = 1), names = FALSE)
}
bin.list <-
sapply(chromosomes,
function(chr, ...)
getIntervals(base.locs[[chr]], data.locs[[chr]], ...),
target = target.hits,
prop = median(rel),
simplify = FALSE)
## str(bin.list)
datahits.list <-
sapply(chromosomes,
function(chr)
as.numeric(table(cut(x = data.locs[[chr]],
breaks = bin.list[[chr]], labels = NULL))),
simplify = FALSE)
str(datahits.list)
if (FALSE)
{
## just a histogram of the raw counts, with no smoothing
countList <-
mapply(FUN =
function(counts, bins) {
stopifnot(length(bins) == length(counts) + 1)
data.frame(bin.center = 0.5 * (bins[-length(bins)] + bins[-1]) / 1e6,
counts = counts)
}, datahits.list, bin.list, SIMPLIFY = FALSE)
countDF <- do.call(make.groups, countList)
pdf("spikes.pdf", width = 20, height = 24)
## xyplot(counts ~ bin.center | which, countDF, type = c("l", "g"),
## layout = c(1, length(chromosomes)), lwd = 0.5,
## main = sprintf("Lane %g normalized by lane %g", data.lane, base.lane),
## scales =
## list(x = list(tick.number = 20),
## y = list(relation = "free", rot = 0)),
## strip = FALSE, strip.left = TRUE)
xyplot(counts ~ bin.center | cut(bin.center, 10),
countDF, subset = (which == "chr2"),
type = c("l", "g"),
layout = c(1, 10),
lwd = 0.5, col = "black",
main = sprintf("Chr 2, Lane %g normalized by lane %g", data.lane, base.lane),
scales =
list(x = list(relation = "sliced", tick.number = 20),
y = list(relation = "same", rot = 0)),
strip = FALSE)
dev.off()
}
fm3 <-
coverageHmm(datahits.list,
family =
hmm.family("nbinom",
mu = 1,
states.scale = initial.states,
size = 20,
states.free = TRUE))
fm3 <- update(fm3, iterations = 150, verbose = TRUE)
fm3 <- update(fm3, iterations = 2, verbose = TRUE)
summary(fm3)
pdf(sprintf("comparison_%g_%g.pdf", data.lane, base.lane),
width = 10, height = 14)
dotplot(sort(rel), xlab = "Relative coverage per chromosome")
xyplot(fm3, decode = "viterbi",
abscissa =
lapply(bin.list,
function(x) (x[-1] + x[-length(x)]) / 2e6 ),
col = c('darkgrey', 'black'),
ylim = c(0, 100),
scales = list(x = list(tick.number = 20, axs = "r")),
xlab = "Location (Mb)", main = "Decoded path (Viterbi)",
lty = 1,
strip.left = TRUE)
dev.off()
## print(rootogram(fm3, xlim = c(0, 50), strip = TRUE,
## main = "Marginal rootogram"))
## print(rootogram(fm3, marginal = FALSE,
## main = "Conditional rootogram",
## xlim = c(0, 50), strip = TRUE))
## long.output <- paste(prefix, "hmm-results-long.csv", sep = "-")
## short.output <- paste(prefix, "hmm-results-short.csv", sep = "-")
## write.hmm(fm3,
## bins = bin.list,
## decode = "viterbi",
## file = long.output)
## write.hmm(fm3,
## bins = bin.list,
## decode = "viterbi",
## combine = TRUE,
## file = short.output)
## densityplot(~data, do.call(make.groups, locs),
## groups = which,
## n = 1000, bw = 5000,
## plot.points = FALSE)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.