getReportOptions <- function(){list(
list(arg="--segments", type="character", required=TRUE, vectorial=TRUE,
help="Path to the bed file containing the segmentation. The name field
in the bed file must be a number from 1 to the maximum number of states.
More than one path can be provided if the bed files describe alternative
segmentations of the same genomic regions and if the paths are labelled,
i.e. of the form `LABEL:PATH`. "),
list(arg="--model", type="character", required=TRUE, parser=readModel,
help="Path to the file with the parameters of the HMM.
All fields must be present."),
list(arg="--outdir", type="character", parser=validateOutdir, default=".",
help="Path to the folder where all results will be saved. If
the folder doesn't exist it will be created."),
list(arg="--prefix", type="character", parser=sanitizeFilename, default="",
help="Prefix to append to all produced files. The prefix should
not be a path (use the -o option for that)."),
list(arg="--colors", type="character", parser=readColors,
help="Path to a text file containing one color per chromatin state.
Each color must be on a separate line, and they can be specified
either as RGB colors `R,G,B` where 0 <= R,G,B < 256, or as R colors
(like `red`, `gold`, `green4`...)."),
list(arg="--labels", type="character", parser=readLines,
help="Path to a text file containing one short text label
per chromatin state. Each label must be on a separate line."),
list(arg="--annot", type="character", vectorial=TRUE, parser=readAnnotations, meta="label:path",
help="Genomic annotation to compare with the segmentation. It must
be specified with a short title (no spaces) and a path to the bed
file containing the annotation (the strand matters), separated by `:`.
This option can be repeated, for example:
`--annot genes:/path1/genes.bed --annot cpg:/path2/cpgislands.bed`."))}
reportCLI <- function(args, prog){
opt <- parseArgs(getReportOptions(), args, prog)
#deal with the two scenarios for the segmentation specification
#unlabelled path to a bed file, acceptable if there is only 1
if (length(opt$segments) == 1 && !grepl(":", opt$segments)){
opt$segments <- readRegions(opt$segments)
} else {#labelled paths to bed files
lp <- label_sc_path(opt$segments, unique.labels=TRUE)
opt$segments <-
GRangesList(lapply(setNames(lp$path, lp$label), readRegions))
}
#call report
htmlpath <- do.call(report, opt)
cat("results written to the file: '", htmlpath, "'\n", sep="")
}
#' Produce an html report of a segmentation
#'
#' @param segments GRanges or GRangesList object containing the segmentation.
#' If GRanges, the \code{names} slot must be a number from 1 to the maximum
#' number of states. Same is true for the unlisted data in a GRangesList.
#' If a GRangesList, the paths to of each bed file will depend on the names
#' given by \code{names(segments)}.
#' @param model A list with the complete set of the parameters that describe
#' the HMM, such as that returned by the \code{segment} function.
#' @param outdir Output directory where the report will be created.
#' @param prefix Prefix prepended to all output files.
#' @param colors A character vector assigning one color per state.
#' each color must be a valid R color.
#' @param labels A character vector assigning one name per state
#' @param annots Named list where each item is a GRanges object to be
#' compared with the segmentation.
#' @param rdata R objects that will be saved as Rdata archives and will be
#' part of the report.
#' @param autoColors a list with two fields that controls how
#' the colors are automatically assigned to each state. The value \code{NULL}
#' disables automatic coloring.
#' @details A web page will be created with plots linked to
#' tables in text format.
#' @return The path to the newly created webpage
#' @export
report <- function(segments, model, outdir=".",
prefix="", colors=NULL, labels=NULL, annots=list(),
rdata=NULL, autoColors=NULL){
#CHECKING ARGUMENTS
#you should make sure that when report is called from 'segmentCLI'
#no error causes the program to abort (all assertions done here
#should always pass)
validateModel(model, strict=FALSE)
nstates <- model$nstates
# 'segmlist' will store the GRangesList object, 'segments' the unlisted data
if (inherits(segments, "GRanges")){
segmlist <- GRangesList(segments)
} else if (inherits(segments, "GRangesList")){
segmlist <- segments
segments <- unlist(segments, use.names=FALSE)
# Checking that the segments in each dataset sum up to the same
# total width. Not a complete check, but should be enough for most purposes
ws <- sapply(segmlist, function(segm) {sum(as.numeric(width(segm)))})
if (any(ws != ws[1])) stop("GRanges not on the same genomic regions")
} else if (is.null(segments)) { stop("'segments' cannot be NULL")
} else stop("'segments' can be a GRangesList or a GRanges object")
snames <- names(segments)
if (is.null(snames)) stop("segments must be annotated with the state number")
inames <- as.integer(snames)
if (any(is.na(inames) | inames <= 0 | inames > nstates)) stop("invalid segment names")
if (!all(sapply(annots, inherits, what="GRanges"))) stop("'annots' must be a list of GenomicRanges objects")
if (length(annots) > 0 && is.null(names(annots))) stop("'annots' elements must be named")
validateOutdir(outdir)
#SANITIZING INPUT
#no errors allowed from here on, at most warnings
prefix <- sanitizeFilename(prefix)
if (!is.null(colors) && !is.null(autoColors)) {
colors <- automaticColoring(getMeanMatrix(model$emisP), autoColors)
}
colors <- pickColors(nstates, colors)
labels <- pickLabels(nstates, labels)
#DOING REPORT
doc <- c(
htmlHeader("EpiCSeg report"),
"<center>",
reportRdata(rdata, outdir, prefix),
reportModel(model, labels, colors, outdir, prefix),
reportSegmList(segmlist, labels, colors, outdir, prefix),
reportAnnots(annots, segments, labels, colors, outdir, prefix),
"</center>",
htmlFooter())
htmlpath <- makePath(outdir, prefix, "report.html")
writeLines(doc, htmlpath)
htmlpath
}
makePath <- function(outdir, prefix, last){
file.path(outdir, paste0(prefix, last))
}
reportRdata <- function(rdata, outdir, prefix){
if (is.null(rdata)) return(NULL)
path <- makePath(outdir, prefix, "rdata.Rdata")
save(rdata, file=path)
htmlLink("additional segmentation data as an R archive", path)
}
reportNBs <- function(nbs, modelPath, outdir, prefix){
path <- makePath(outdir, prefix, "nbs.png")
png(path)
plotNBs(nbs, main="total count across marks", xlab="mean +- standard deviation", ylab=NA)
dev.off()
#return html
htmlImgLink(path, modelPath)
}
reportMeans <- function(means, modelPath, statecolors, outdir, prefix){
path <- makePath(outdir, prefix, "means.png")
#plot mean matrix
myheat(t(means), xlab="mark", ylab="state", zlab="mean count",
main="mean counts", dev=path, col=heatpal, rowColors=statecolors)
#return html
htmlImgLink(path, modelPath)
}
reportLMeans <- function(lmeans, statecolors, outdir, prefix){
paths <- makePath(outdir, prefix, c("lmeans.txt", "lmeans.png"))
#write table
write.table(lmeans, col.names=T, row.names=T, file=paths[1], quote=F, sep="\t")
#make plot
myheat(t(lmeans), xlab="mark", ylab="state", zlab="log(mean count + 1)",
main="log of mean counts", dev=paths[2], col=heatpal,rowColors=statecolors)
#return html
htmlImgLink(paths[2], paths[1])
}
reportTrans <- function(transP, modelPath, statecolors, outdir, prefix){
path <- makePath(outdir, prefix, "transP.png")
#plot transition probabilities
myheat(transP, xlab="state to", ylab="state from", zlab="probability",
main="transition\nmatrix", dev=path, col=heatpal, rowColors=statecolors,
colColors=statecolors)
#return html
htmlImgLink(path, modelPath)
}
reportModel <- function(model, labels, statecolors, outdir, prefix){
#setting the right labels
names(model$emisP) <- labels
rownames(model$transP) <- labels
colnames(model$transP) <- labels
#convert parameters from list to matrix format
nbs <- getNBs(model$emisP)
means <- getMeanMatrix(model$emisP)
rownames(means) <- model$marks
#find a good permutation of the marks for plotting
#determine a distance matrix between marks
lmeans <- log(means + 1)
dmat <- as.matrix(dist(lmeans, method="euclidean"))
#get a small weight path through all the marks
marksOrder <- smallWeightHamiltonianPath(dmat)
#write the model
modelPath <- makePath(outdir, prefix, "model.txt")
writeModel(model, modelPath)
nbs_html <- reportNBs(nbs, modelPath, outdir, prefix)
means_html <- reportMeans(means[marksOrder,,drop=F], modelPath, statecolors, outdir, prefix)
lmeans_html <- reportLMeans(lmeans[marksOrder,,drop=F], statecolors, outdir, prefix)
trans_html <- reportTrans(model$transP, modelPath, statecolors, outdir, prefix)
htmlSection("1. Model parameters",
paste(sep="\n",
nbs_html,"<br>",
htmlMatToTable(valign="top", matrix(nrow=1, c(means_html, lmeans_html, trans_html)))
)
)
}
reportSegmList <- function(segmlist, labels, colors, outdir, prefix){
nstates <- length(colors)
#write colors
colorsPath <- makePath(outdir, prefix, "colors.txt")
writeColors(colors, colorsPath)
#decide paths
if (length(segmlist)==1 || is.null(names(segmlist))){
segmPaths <- makePath(outdir, prefix, "segmentation.bed")
} else {
if (is.null(names(segmlist))) names(segmlist) <- 1:length(segmlist)
segmPaths <- makePath(outdir, prefix, paste0("segmentation_", names(segmlist), ".bed"))
}
if (any(labels != as.character(1:nstates))) {
segmPaths <- gsub(".bed$", "_labelled.bed", segmPaths) }
for (i in seq_along(segmlist)){
segmentsToBed(segmlist[[i]], labels, col2bedCol(colors), segmPaths[i])
}
#write table
segments <- unlist(segmlist, use.names=FALSE)
tabPaths <- makePath(outdir, prefix, c("table.txt", "table.png"))
inames <- as.integer(names(segments))
tab <- kfoots:::sumAt(width(segments), inames, size=nstates, zeroIdx=FALSE)
tab <- tab/sum(tab)
write.table(matrix(tab, ncol=1), col.names=F, row.names=F, file=tabPaths[1], quote=F, sep="\t")
png(tabPaths[2])
barplot(rev(tab), col=rev(colors), main="state frequencies", xlab="fraction of all bins",
horiz=TRUE, border=NA, names.arg=rev(labels), las=2)
dev.off()
if (length(segmlist)==1) {
bedLinks <- htmlLink("segmentation as a .bed file", segmPaths)
} else {
bedLinks <- paste0(collapse="<br>\n",
htmlLink(
paste0("segmentation as a .bed file (", names(segmlist), ")"),
segmPaths))
}
htmlSection("2. Segmentation",
paste(sep="\n",
htmlLink("state colors", colorsPath),"<br>",
bedLinks,"<br>",
htmlImgLink(tabPaths[2], tabPaths[1])
)
)
}
reportAnnot <- function(annot, name, segments, labels, colors, outdir, prefix){
nstates <- length(colors)
paths <- makePath(outdir, prefix, paste0("annot_", name, c(".txt", ".png")))
cat("processing annotation: '", name, "'\n", sep="")
prof <- avgStateProfile(annot, segments, nstates, before=5000, after=5000)
rownames(prof) <- labels
plotProfileAndLegend2Dev(prof, colors, dev=paths[2], main=paste0("states vs ", name))
write.table(prof, file=paths[1], col.names=T, row.names=F, quote=F, sep="\t")
htmlImgLink(paths[2], paths[1])
}
reportAnnots <- function(annots, segments, labels, colors, outdir, prefix){
#process annotations independently
annotDocs <- sapply(seq_along(annots), function(i){
reportAnnot(annots[[i]], names(annots)[i], segments, labels, colors, outdir, prefix)
})
if (length(annotDocs)==0) return(NULL)
htmlSection("3. Genomic annotations", htmlMatToTable(matrix(nrow=1, annotDocs)))
}
pickLabels <- function(nstates, labels){
if (!is.null(labels) ){
if (length(labels) != nstates) {
warning("wrong number of state labels provided")
labels <- NULL
} else labels <- sanitizeFilename(labels)
}
if (is.null(labels)) labels <- as.character(1:nstates)
labels
}
pickColors <- function(nstates, colors){
#1. try to use user-defined colors
if (!is.null(colors) && length(colors) != nstates){
warning("wrong number of state colors provided")
colors <- NULL
}
#check if the given colors are valid R colors
tryCatch(col2bedCol(colors),
error = function(e){
warning(paste0("invalid colors:\n", e$message))
colors <- NULL
})
#3. fall back to some arbitrary color palette
if (is.null(colors)){
set.seed(13)
colors <- sample(qual.pal(nstates))
set.seed(NULL)
}
colors
}
#average rank of counts distributed according to the given parameters (mu and r)
#if considered together with all the empirical counts
expRank <- function(counts, mus, rs=Inf, normalize=T){
if (length(rs)==1) rs <- rep(rs, length(mus))
if (length(mus) != length(rs)) stop("invalid number of rs provided (one or as many as the mus)")
#rank when the simulated count has a value equal or less to the maximal empirical count
#(separately for each count in [0, length(countsTab)-1])
countsTab <- kfoots:::tabFast(counts)
ccountsTab <- cumsum(countsTab)
totc <- length(counts)
maxc <- length(countsTab)-1
condRank <- countsTab/2 + 1 + c(0,ccountsTab[1:maxc])
sapply(seq_along(mus), function(i){
mu <- mus[i]
r <- rs[i]
if (is.finite(mu*r)){
ps <- dnbinom(0:maxc, mu=mu, size=r)
p <- pnbinom(maxc, mu=mu, size=r, lower.tail=F)
} else {
ps <- dpois(0:maxc, lambda=mu)
p <- ppois(maxc, lambda=mu, lower.tail=F)
}
rnk <- p*(totc+1) + sum(ps*condRank)
if (normalize) rnk <- (rnk-1)/totc
rnk
})
}
#in the argument 'means', rows are marks, columns are states
#the rows must be named
#the argument 'rs' is the r parameter for each state
#the order of the states must match between 'means' and 'rs'
#in the result, rows are marks, columns are states
expRankMatrix <- function(counts, means, rs, normalize=T){
if (any(rownames(means) != rownames(counts))) stop("mean matrix not matching")
#loop on the marks
erm <- t(sapply(1:nrow(means), function(i){
expRank(counts[i,], means[i,], rs, normalize=normalize)
}))
dimnames(erm) <- dimnames(means)
erm
}
#METHODS TO VALIDATE AND SANITIZE ARGUMENTS
readAnnotations <- function(annotFields){
lp <- label_sc_path(annotFields, unique.labels=TRUE)
lp$label <- sanitizeFilename(lp$label)
lapply(setNames(lp$path, lp$label), readRegions)
}
#check that an input string can be used
#as a filename
sanitizeFilename <- function(fname){
gsub("[^[:alnum:]_-]+", "_", fname)
}
checkWritable <- function(path){
if (file.access(path, 0)<0){
#file doesn't exist, see if you can create and delete one
tryCatch({
writeLines("checking path", path)
file.remove(path)
}, error = function(e){
stop(paste0("something is wrong with the provided 'outdir' parameter:\n",
e$message))
})
} else {
#file exists, check that you have write permissions
if (file.access(path, 2)<0) {
stop(paste0("no write permissions to the file: ", path))}
}
}
#this is not really a 'reader',
#it is only checking that oudir is a valid
#output directory by trying to create and delete
#a dummy file in that directory
validateOutdir <- function(outdir){
if (length(outdir) != 1) stop("expecting a single path")
path <- file.path(outdir, ".check_path.txt")
checkWritable(path)
outdir
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.