Nothing
#' @include CTSS.R
####################################################################################
# Implementation of simple distance-based clustering of data attached to sequences
#
#' @name distclu-functions
#'
#' @title Private functions for distance clustering.
#'
#' @param max.dist See [clusterCTSS()].
#' @param useMulticore,nrCores See clusterCTSS.
#'
#' @description The flow of data is that a [CTSS] object of CTSSes is
#' progressively deconstructed, and data to form the clusters is progressively
#' integrated in a [data.table] object, which is finally converted to [GRanges]
#' at the end. Doing the whole clustering with `GRanges` is more elegant, but
#' looping on a `GRangesList` was just too slow. Maybe the operation on the
#' `data.table` is more efficient because it is vectorised.
#'
#' @examples
#' # Get example data
#' library(IRanges)
#' library(GenomicRanges)
NULL
#' @name .cluster.ctss.strand
#' @rdname distclu-functions
#'
#' @description `.cluster.ctss.strand` does the strandless distance clustering
#' of strandless CTSS positions from a single chromosome. Input does not need
#' to be sorted, but _pay attention that the output is sorted_.
#'
#' @param ctss.ipos.chr A IPos object.
#'
#' @return `.cluster.ctss.strand` returns an [data.table] object containing
#' arbitrary cluster IDs (as integers) for each CTSS.
#'
#' @importFrom data.table data.table
#' @importFrom IRanges IPos
#' @importFrom S4Vectors queryHits
#' @importFrom S4Vectors runLength
#' @importFrom S4Vectors runValue
#' @importFrom S4Vectors subjectHits
#'
#' @examples
#'
#' #.cluster.ctss.strand
#' ctss.ipos.chr <- IPos(c(1,3,4,12,14,25,28))
#' CAGEr:::.cluster.ctss.strand(ctss.ipos.chr, 5)
#'
#' ctss.chr <- CTSScoordinatesGR(exampleCAGEexp)
#' ctss.chr <- ctss.chr[strand(ctss.chr) == "+"]
#' ctss.ipos.chr <- ranges(ctss.chr)
#' # Same result if not sorted
#' identical(
#' CAGEr:::.cluster.ctss.strand(ctss.ipos.chr, 20),
#' CAGEr:::.cluster.ctss.strand(ctss.ipos.chr[sample(seq_along(ctss.ipos.chr))], 20)
#' )
#' # Returns an emtpy data.table object if given an empty IRanges object.
#' identical(CAGEr:::.cluster.ctss.strand(IPos(), 20), data.table::data.table())
setGeneric(".cluster.ctss.strand", function(ctss.ipos.chr, max.dist) standardGeneric(".cluster.ctss.strand"))
setMethod(".cluster.ctss.strand", "IPos", function(ctss.ipos.chr, max.dist) {
if (identical(ctss.ipos.chr, IPos())) return(data.table())
v <- Rle(rep(TRUE, max(start(ctss.ipos.chr)) + max.dist))
v[start(ctss.ipos.chr) + max.dist] <- FALSE
longRuns <- which(runLength(v) >= max.dist & runValue(v))
c.starts <- cumsum(runLength(v))[longRuns] - max.dist
c.ends <- c(cumsum(runLength(v))[(longRuns - 1)[-1]], length(v)) - max.dist
clusters <- IRanges(start = c.starts, end = c.ends)
o <- findOverlaps(clusters, ctss.ipos.chr)
data.table(id = queryHits(o))
})
#' @name .cluster.ctss.chr
#' @rdname distclu-functions
#'
#' @description \code{.cluster.ctss.chr} does the stranded distance clustering of CTSS on a
#' single chromosome, by dispatching both strands to \code{.cluster.ctss.strand} and merging
#' the results, taking care keep the cluster IDs unique. Be careful that this function does
#' not look at the score.
#'
#' @param ctss.chr A CTSS.chr object.
#'
#' @return \code{.cluster.ctss.chr} returns a \code{\link{data.table}} object representing the
#' chromosome coordinates (\code{chr}, \code{pos}, \code{strand}) of each CTSS, with their
#' cluster ID (\code{id}).
#'
#' @importFrom data.table data.table
#' @importFrom S4Vectors decode
#'
#' @examples
#'
#' #.cluster.ctss.chr
#' ctss.chr <- as(CTSScoordinatesGR(exampleCAGEexp), "CTSS.chr")
#' CAGEr:::.cluster.ctss.chr(ctss.chr, 20)
setGeneric(".cluster.ctss.chr", function(ctss.chr, max.dist) standardGeneric(".cluster.ctss.chr"))
setMethod(".cluster.ctss.chr", "CTSS.chr", function(ctss.chr, max.dist) {
clusters.p <- .cluster.ctss.strand(ranges(ctss.chr[strand(ctss.chr) == "+"]), max.dist)
clusters.m <- .cluster.ctss.strand(ranges(ctss.chr[strand(ctss.chr) == "-"]), max.dist)
maxIdP <- if (identical(clusters.p, data.table())) 0 else max(clusters.p)
clusters.m <- clusters.m + maxIdP
data.table( rbind(clusters.p, clusters.m)
, chr = as.character(seqnames(ctss.chr))
, pos = start(ctss.chr)
, strand = decode(strand(ctss.chr)))
})
#' @name .ctss2clusters
#' @rdname distclu-functions
#'
#' @description \code{.ctss2clusters} does the stranded distance clustering of CTSS.
#'
#' @param ctss A CTSS object with a score column.
#'
#' @return \code{.ctss2clusters} returns a \code{\link{data.table}} object representing the
#' cluster ID (\code{id}), chromosome coordinates (\code{chr}, \code{pos}, \code{strand}) and
#' the \code{score} of each CTSS.
#'
#' @importFrom data.table data.table
#' @importFrom S4Vectors decode
#'
#' @examples
#'
#' # .ctss2clusters
#' ctss <- CTSScoordinatesGR(exampleCAGEexp)
#' score(ctss) <- CTSSnormalizedTpmDF(exampleCAGEexp)[[1]]
#' seqnames(ctss)[rep(c(TRUE,FALSE), length(ctss) / 2)] <- "chr16"
#' ctss
#' clusters <- CAGEr:::.ctss2clusters(ctss, 20)
#' clusters
setGeneric(".ctss2clusters", function(ctss, max.dist = 20, useMulticore = FALSE, nrCores = NULL) standardGeneric(".ctss2clusters"))
setMethod(".ctss2clusters", "CTSS", function(ctss, max.dist, useMulticore, nrCores) {
ctss <- sort(ctss)
ctss <- ctss[score(ctss) != 0]
ctss.list <- split(ctss, droplevels(seqnames(ctss)))
ctss.list <- lapply(ctss.list, as, "CTSS.chr")
ctss.list <- bplapply( ctss.list, .cluster.ctss.chr, max.dist = max.dist
, BPPARAM = CAGEr_Multicore(useMulticore, nrCores))
max.clust <- sapply(ctss.list, function(x) {max(x$id)})
max.clust <- cumsum(c(0, max.clust[-length(max.clust)]))
for (x in seq_along(max.clust))
ctss.list[[x]]$id <- ctss.list[[x]]$id + max.clust[x]
dt <- do.call(rbind, ctss.list)
dt$tpm <- decode(score(ctss))
dt
})
#' @name .summarize.clusters
#' @rdname distclu-functions
#' @description \code{.summarize.clusters} calculates the number of CTSS, and
#' the position and score of a main peak, for each cluster.
#'
#' @param ctss.clustered A \code{\link{data.table}} object representing the cluster ID
#' (\code{id}), chromosome coordinates (\code{chr}, \code{pos}, \code{strand}) and
#' the \code{score} of each CTSS.
#' @param removeSingletons Remove \dQuote{singleton} clusters that span only a single
#' nucleotide ? (default = FALSE).
#' @param keepSingletonsAbove Even if \code{removeSingletons = TRUE}, keep singletons when
#' their score is aboove threshold (default = \code{Inf}).
#'
#' @return \code{.summarize.clusters} returns GRanges describing the clusters.
#'
#' @importFrom data.table setnames
#'
#' @examples
#'
#' # .summarize.clusters
#' CAGEr:::.summarize.clusters(clusters)
#' CAGEr:::.summarize.clusters(clusters, removeSingletons = TRUE)
#' CAGEr:::.summarize.clusters(clusters, removeSingletons = TRUE, keepSingletonsAbove = 5)
setGeneric(".summarize.clusters", function(ctss.clustered, removeSingletons = FALSE, keepSingletonsAbove = Inf) standardGeneric(".summarize.clusters"))
setMethod(".summarize.clusters", "data.table", function(ctss.clustered, removeSingletons, keepSingletonsAbove) {
find.dominant.idx <- function (x) {
w <- which(x == max(x))
w[ceiling(length(w)/2)]
}
chr <- pos <- tpm <- cluster <- id <- NULL # To keep R CMD check happy.
clusters <- ctss.clustered[ , list( chr[1]
, min(pos)
, max(pos)
, strand[1]
, length(pos)
, pos[find.dominant.idx(tpm)]
, sum(tpm)
, max(tpm))
, by = id]
setnames(clusters, c( "cluster"
, "chr", "start", "end", "strand"
, "nr_ctss", "dominant_ctss", "tpm", "tpm.dominant_ctss"))
if(removeSingletons)
clusters <- subset(clusters, clusters$nr_ctss > 1 | clusters$tpm >= keepSingletonsAbove)
gr <- GRanges( seqnames = Rle(factor(clusters$chr))
, ranges = IRanges(clusters$start, clusters$end)
, strand = clusters$strand
, score = Rle(clusters$tpm)
, nr_ctss = clusters$nr_ctss
, dominant_ctss = clusters$dominant_ctss
, tpm.dominant_ctss = Rle(clusters$tpm.dominant_ctss)
)
names(gr) <- seq_along(gr)
sort(gr)
})
#' @name .distclu
#' @rdname distclu-functions
#' @description \code{.distclu} receives the data from the main \code{clusterCTSS} and
#' dispatches each for (possibly parallel) processing.
#'
#' @param se A \code{\link{SummarizedExperiment}} object representing the CTSSes and
#' their expression in each sample.
#'
#' @return \code{.distclu} returns GRanges describing the clusters.
#'
#' @importFrom GenomicRanges GRangesList
#' @importFrom SummarizedExperiment rowRanges
#'
#' @examples
#'
#' # .distclu
#' CAGEr:::.distclu(CTSStagCountSE(exampleCAGEexp))
#' \dontrun{
#' CAGEr:::.distclu(CTSStagCountSE(exampleCAGEexp), useMulticore = TRUE)
#' }
#'
#' CAGEr:::.distclu(CTSStagCountSE(exampleCAGEset))
setGeneric(".distclu", function(se, max.dist = 20, removeSingletons = FALSE, keepSingletonsAbove = Inf, useMulticore = FALSE, nrCores = NULL) standardGeneric(".distclu"))
setMethod(".distclu", "SummarizedExperiment", function(se, max.dist, removeSingletons, keepSingletonsAbove, useMulticore, nrCores) {
ctss.cluster.list <- list()
for(s in colnames(se)) {
message("\t-> ", s)
d <- as(rowRanges(se), "CTSS")
score(d) <- assays(se)[["normalizedTpmMatrix"]][[s]]
d <- subset(d, score(d) > 0)
clusters <- .ctss2clusters(ctss = d, max.dist = max.dist, useMulticore = useMulticore, nrCores = nrCores)
ctss.cluster.list[[s]] <- clusters
}
ctss.cluster.list <- bplapply( ctss.cluster.list, .summarize.clusters
, removeSingletons = removeSingletons
, keepSingletonsAbove = keepSingletonsAbove
, BPPARAM = CAGEr_Multicore(useMulticore, nrCores))
GRangesList(ctss.cluster.list)
})
#################################################################################################################
# Implementation of Paraclu - parametric clustering of data attached to sequences (http://www.cbrc.jp/paraclu/)
# Reference: A code for transcription initiation in mammalian genomes,
# MC Frith, E Valen, A Krogh, Y Hayashizaki, P Carninci, A Sandelin, Genome Research 2008 18(1):1-12)
#
#' @importFrom utils tail
.paraclu1 <- function(ctss) {
sit <- nrow(ctss)
tot <- sum(ctss$tpm)
if(sit == 1) {
min_density <- Inf
br <- NA
}else{
densities_forward <- cumsum(ctss$tpm)[-sit]/(ctss$pos[2:sit] - ctss$pos[1])
densities_reverse <- cumsum(rev(ctss$tpm))[-sit]/(ctss$pos[sit] - ctss$pos[(sit-1):1])
min_densities = c(min(densities_forward), min(densities_reverse))
breaks <- c(which(densities_forward == min_densities[1])[1] + 1, sit + 1 - which(densities_reverse == min_densities[2])[1])
min_density <- min(min_densities)
br <- breaks[tail(which(min_densities == min_density),1)]
}
return(as.list(c(br, min_density, tot, sit)))
}
.paraclu2 <- function(ctss, min_density = -Inf, clusters.df = data.frame()) {
if(nrow(ctss)>0){
ctss <- ctss[order(ctss$pos),]
params <- .paraclu1(ctss)
br <- params[[1]]
max_density <- params[[2]]
tot <- params[[3]]
sit <- params[[4]]
if(!(max_density == Inf)){
new_min <- max(min_density, max_density)
clusters.df <- rbind(.paraclu2(ctss = ctss[1:(br-1),], min_density = new_min, clusters.df = clusters.df), .paraclu2(ctss = ctss[br:nrow(ctss),], min_density = new_min, clusters.df = clusters.df))
}
return(rbind(clusters.df, data.frame(chr = ctss$chr[1], start = min(ctss$pos), end = max(ctss$pos), strand = ctss$strand[1], nr_ctss = sit, dominant_ctss = ctss$pos[which(ctss$tpm == max(ctss$tpm))[ceiling(length(which(ctss$tpm == max(ctss$tpm)))/2)]], tpm = tot, tpm.dominant_ctss = ctss$tpm[which(ctss$tpm == max(ctss$tpm))[ceiling(length(which(ctss$tpm == max(ctss$tpm)))/2)]], min_d = min_density, max_d= max_density)))
}else{
return(clusters.df)
}
}
.paraclu3 <- function(ctss.df, minStability = 1, maxLength = 500, removeSingletons = FALSE, keepSingletonsAbove = Inf, reduceToNonoverlapping = TRUE, useMulticore = FALSE, nrCores = NULL){
ctss.df.plus.list <-lapply(as.list(unique(ctss.df$chr)), function(x) {subset(ctss.df, ctss.df$chr == x & strand == "+")})
ctss.df.minus.list <-lapply(as.list(unique(ctss.df$chr)), function(x) {subset(ctss.df, ctss.df$chr == x & strand == "-")})
ctss.df.list <- append(ctss.df.plus.list, ctss.df.minus.list)
clusters.list <- bplapply( ctss.df.list, .paraclu2
, BPPARAM = CAGEr_Multicore(useMulticore, nrCores))
n <- length(clusters.list)/2
clusters.list <- lapply(as.list(c(1:n)), function(x) {rbind(clusters.list[[x]], clusters.list[[x+n]])})
clusters <- do.call(rbind, clusters.list)
clusters <- subset(clusters, (clusters$max_d >= (minStability * clusters$min_d)) & ((end - start + 1) <= maxLength))
if(removeSingletons == TRUE){
clusters <- subset(clusters, !(clusters$start == clusters$end & clusters$tpm < keepSingletonsAbove))
}
if(reduceToNonoverlapping == TRUE){
clusters.gr <- GRanges(seqnames = clusters$chr, ranges = IRanges(start = clusters$start, end = clusters$end), strand = clusters$strand, elementMetadata = clusters)
o <- findOverlaps(clusters.gr, drop.self = TRUE, type = "within")
clusters.gr <- clusters.gr[-queryHits(o)]
clusters <- subset(clusters, paste(clusters$chr, clusters$strand, clusters$start, clusters$end, sep = ".") %in% paste(seqnames(clusters.gr), strand(clusters.gr), start(clusters.gr), end(clusters.gr), sep = "."))
}
clusters <- cbind(cluster = c(1:nrow(clusters)), clusters)
clusters$start <- clusters$start - 1
rownames(clusters) <- c(1:nrow(clusters))
colnames(clusters)[which(colnames(clusters) == "min_d")] <- "min_density"
colnames(clusters)[which(colnames(clusters) == "max_d")] <- "max_density"
return(clusters)
}
.paraclu <- function(data, sample.labels, minStability = 1, maxLength = 500, removeSingletons = FALSE, keepSingletonsAbove = Inf, reduceToNonoverlapping = TRUE, useMulticore = FALSE, nrCores = NULL) {
ctss.cluster.list <- list()
for(s in sample.labels) {
message("\t-> ", s)
d <- data[,c("chr", "pos", "strand", s)]
colnames(d) <- c("chr", "pos", "strand", "tpm")
d <- d[d$tpm>0,]
ctss.cluster.df <- .paraclu3(ctss.df = d, minStability = minStability, maxLength = maxLength, removeSingletons = removeSingletons, keepSingletonsAbove = keepSingletonsAbove, reduceToNonoverlapping = reduceToNonoverlapping, useMulticore = useMulticore, nrCores = nrCores)
ctss.cluster.list[[s]] <- ctss.cluster.df
}
return(ctss.cluster.list)
}
#########################################################################
# Imposing predefined clusters to segment the data attached to sequences
.cluster.ctss.strand.predef <- function(ctss.df, custom.clusters) {
clusters <- data.frame(cluster = c(1:length(custom.clusters$start)), chr = custom.clusters$chr, start = custom.clusters$start, end = custom.clusters$end, strand = custom.clusters$strand)
clusters.ir <- IRanges(start = clusters$start+1, end = clusters$end)
ctss.df <- ctss.df[order(ctss.df$pos),]
ctss.df.ir <- IRanges(start = ctss.df$pos, end = ctss.df$pos)
o <- findOverlaps(clusters.ir, ctss.df.ir)
ctss.df.1 <- cbind(ctss.df[subjectHits(o),], cluster = clusters$cluster[queryHits(o)], start = clusters$start[queryHits(o)], end = clusters$end[queryHits(o)])
ctss.df.2 <- data.frame(chr = clusters$chr[!(clusters$cluster %in% clusters$cluster[queryHits(o)])], pos = rep(NA, length(clusters$cluster[!(clusters$cluster %in% clusters$cluster[queryHits(o)])])), strand = clusters$strand[!(clusters$cluster %in% clusters$cluster[queryHits(o)])], tpm = rep(0, length(clusters$cluster[!(clusters$cluster %in% clusters$cluster[queryHits(o)])])), cluster = clusters$cluster[!(clusters$cluster %in% clusters$cluster[queryHits(o)])], start = clusters$start[!(clusters$cluster %in% clusters$cluster[queryHits(o)])], end = clusters$end[!(clusters$cluster %in% clusters$cluster[queryHits(o)])])
ctss.df <- rbind(ctss.df.1, ctss.df.2)
ctss.df <- ctss.df[order(ctss.df$cluster),]
invisible(gc())
return(ctss.df)
}
.cluster.ctss.chr.predef <- function(ctss.df, custom.clusters) {
ctss.df.p <- subset(ctss.df, strand == "+")
custom.clusters.p <- subset(custom.clusters, strand == "+")
if(nrow(custom.clusters.p) > 0) {
ctss.df.plus <- .cluster.ctss.strand.predef(ctss.df = ctss.df.p, custom.clusters = custom.clusters.p)
last.plus.cluster <- max(ctss.df.plus$cluster)
}else{
ctss.df.plus <- data.frame()
last.plus.cluster <- 0
}
invisible(gc())
ctss.df.m <- subset(ctss.df, strand == "-")
custom.clusters.m <- subset(custom.clusters, strand == "-")
if(nrow(custom.clusters.m) > 0) {
ctss.df.minus <- .cluster.ctss.strand.predef(ctss.df = ctss.df.m, custom.clusters = custom.clusters.m)
ctss.df.minus$cluster <- last.plus.cluster + ctss.df.minus$cluster
}else{
ctss.df.minus <- data.frame()
}
invisible(gc())
return(rbind(ctss.df.plus, ctss.df.minus))
}
.ctss2clusters.predef <- function(ctss.df, custom.clusters, useMulticore = FALSE, nrCores = NULL) {
ctss.cluster.list <- bplapply(as.list(unique(custom.clusters$chr)), function(x) {
ctss.df.chr <- subset(ctss.df, ctss.df$chr == x)
custom.clusters <- subset(custom.clusters, custom.clusters$chr == x)
if(nrow(custom.clusters)>0){
ctss.cluster.chr.df <- .cluster.ctss.chr.predef(ctss.df = ctss.df.chr, custom.clusters = custom.clusters)
}
}, BPPARAM = CAGEr_Multicore(useMulticore, nrCores))
max.clust <- unlist(lapply(ctss.cluster.list, function(x) {max(x$cluster)}))
max.clust <- cumsum(c(0, max.clust[-length(max.clust)]))
invisible(gc())
ctss.cluster.list <- lapply(as.list(1:length(max.clust)), function(x) {
a = ctss.cluster.list[[x]]
a$cluster = a$cluster + max.clust[x]
return(a)
}
)
ctss.cluster.df <- do.call(rbind, ctss.cluster.list)
invisible(gc())
return(ctss.cluster.df)
}
.summarize.clusters.predef <- function(ctss.cluster.df) {
ctss.cluster <- data.table(ctss.cluster.df)
chr <- pos <- tpm <- cluster <- NULL # To keep R CMD check happy.
ctss.cluster <- ctss.cluster[, list(chr[1], start[1], end[1], strand[1], length(pos), pos[which(tpm == max(tpm))[ceiling(length(which(tpm == max(tpm)))/2)]], sum(tpm), max(tpm)), by = cluster]
setnames(ctss.cluster, c("cluster", "chr", "start", "end", "strand", "nr_ctss", "dominant_ctss", "tpm", "tpm.dominant_ctss"))
ctss.cluster <- data.frame(ctss.cluster)
ctss.cluster$nr_ctss[ctss.cluster$tpm == 0] <- 0
invisible(gc())
return(ctss.cluster)
}
.predefined.clusters <- function(data, sample.labels, custom.clusters, useMulticore = FALSE, nrCores = NULL){
ctss.cluster.list <- list()
for(s in sample.labels) {
message("\t-> ", s)
d <- data[,c("chr", "pos", "strand", s)]
colnames(d) <- c("chr", "pos", "strand", "tpm")
d <- d[d$tpm > 0,]
ctss.cluster.df <- .ctss2clusters.predef(ctss.df = d, custom.clusters = custom.clusters, useMulticore = useMulticore, nrCores = nrCores)
ctss.cluster.list[[s]] <- ctss.cluster.df
invisible(gc())
}
ctss.cluster.list <- bplapply( ctss.cluster.list, .summarize.clusters.predef
, BPPARAM = CAGEr_Multicore(useMulticore, nrCores))
invisible(gc())
return(ctss.cluster.list)
}
#' @name tagClusterConvertors
#'
#' @title Private functions to convert TC formats
#'
#' @description Interconvert tag clusters (TC) formats used in classes CAGEset
#' (\code{data.frame}) and CAGEexp (\code{GRanges}).
#'
#' @details
#' The original format used in \code{\link{CAGEset}} objects follows BED
#' ("0-based") conventsion for the start and end coordinates. On the other
#' hand, the GRanges objects used in \code{\link{CAGEexp}} objects follow
#' the "1-based" convention. Therefore a value of 1 has to be added or
#' subtracted to the start positions when converting between both formats.
#'
#' @family df2granges converters
#'
#' @examples
#' df <- tagClusters(exampleCAGEset, 1)
#' head(df)
#' gr <- CAGEr:::TCdataframe2granges(df)
#' gr
#' head(CAGEr:::TCgranges2dataframe(gr))
#' # No exact round-trip because start and end were not integer in df.
#' identical(df, CAGEr:::TCgranges2dataframe(gr))
#' if (! all(df == CAGEr:::TCgranges2dataframe(gr)))
#' stop("No round-trip between TCdataframe2granges and TCgranges2dataframe")
#'
#' tagClustersGR(exampleCAGEexp)
#' head(CAGEr:::TCgranges2dataframe(CAGEr:::tagClustersGR(exampleCAGEexp, 1)))
NULL
#' @name TCgranges2dataframe
#'
#' @rdname tagClusterConvertors
#'
#' @param gr Consensus clusters in \code{GRanges} format.
TCgranges2dataframe <- function(gr) {
if (!is.null(gr$cluster)) {
df <- data.frame(cluster = gr$cluster)
gr$cluster <- NULL
} else {
df <- data.frame(cluster = as.integer(names(gr))) # Make sure it does not sort lexically!
}
df <- cbind(df , data.frame( chr = decode(seqnames(gr))
, start = start(gr) -1
, end = end(gr)
, strand = decode(droplevels(strand(gr)))))
if(is.null(gr$tpm))
df$tpm <- score(gr)
score(gr) <- NULL
df <- cbind(df, mcols(gr))
as.data.frame(df)
}
#' @name TCdataframe2granges
#' @rdname tagClusterConvertors
#'
#' @param df Consensus clusters in \code{data.frame} format.
TCdataframe2granges <- function(df) {
gr <- GRanges( seqnames = df$chr
, ranges = IRanges(df$start + 1, df$end)
, score = df$tpm
, strand = df$strand)
if(is.null(df[["cluster"]]))
gr$cluster <- seq_along(gr)
mcols(gr) <- cbind( mcols(gr)
, df[,setdiff(colnames(df), c("chr", "start", "end", "strand")), drop = FALSE])
names(gr) <- rownames(df)
gr
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.