R/ExportMethods.R
In CAGEr: Analysis of CAGE (Cap Analysis of Gene Expression) sequencing data for precise mapping of transcription start sites and promoterome mining

#' @include CAGEr.R

###########################################################
# Functions for exporting results (graphical and textual)
#

#' @name plotReverseCumulatives
#' 
#' @title Plot reverse cumulative number of CAGE tags per CTSS
#' 
#' @description Plots the reverse cumulative distribution of the number of CAGE
#' tags per CTSS for all CAGE datasets present in the [`CAGEr`] object.
#' The plots should be used as help in choosing the parameters for power-law
#' normalization: the range of values to fit the power-law and the slope of the
#' referent power-law distribution (Balwierz _et al_., Genome Biology 2009).
#' 
#' @param object A CAGEr object
#' 
#' @param values Which values should be plotted: `raw` (default) for raw CAGE
#' tag counts or `normalized` for normalized tag count values.
#' 
#' @param fitInRange An integer vector with two values specifying a range of tag
#' count values to be used for fitting a power-law distribution to reverse
#' cumulatives.  Ignored is set to `NULL`.  See Details.
#' 
#' @param onePlot Logical, should all CAGE datasets be plotted in the same
#' plot (`TRUE`) or in separate plots (`FALSE`). 
#' 
#' @param main Main title for the plot.
#' 
#' @param legend Set to `NULL` to prevent the display of the sample legend.
#' 
#' @param xlab,ylab Axis labels passed to [`plot`].
#' 
#' @param xlim,ylim Axis range parameters passed to [`plot`].
#' 
#' @details Number of CAGE tags (X-axis) is plotted against the number of TSSs that
#' are supported by >= of that number of tags (Y-axis) on a log-log scale for each
#' sample. In addition, a power-law distribution is fitted to each reverse cumulative
#' using the values in the range specified by `fitInRange` parameter. The fitted
#' distribution is defined by `y = -1 * alpha * x + beta` on the log-log scale,
#' and the value of `alpha` for each sample is shown on the plot. In addition,
#' a suggested referent power-law distribution to which all samples should be
#' normalized is drawn on the plot and corresponding parameters (slope alpha and total
#' number of tags T) are denoted on the plot.  Referent distribution is chosen so
#' that its slope (alpha) is the median of slopes fitted to individual samples and
#' its total number of tags (T) is the power of 10 nearest to the median number of tags
#' of individual samples.  Resulting plots are helpful in deciding whether power-law
#' normalization is appropriate for given samples and reported `alpha` values aid
#' in choosing optimal `alpha` value for referent power-law distribution to
#' which all samples will be normalized. For details about normalization see
#' [`normalizeTagCount`] function.
#' 
#' @return Plots of reverse cumulative number of CAGE tags per CTSS for each CAGE
#' dataset within CAGEr object.  Alpha values of fitted power-laws and suggested
#' referent power-law distribution are reported on the plot in case `values = "raw"`.
#' 
#' @references Balwierz _et al_. (2009) Methods for analyzing deep sequencing
#' expression data: constructing the human and mouse promoterome with deepCAGE data,
#' _Genome Biology_ **10**(7):R79. 
#' 
#' @author Vanja Haberle
#' 
#' @seealso [`normalizeTagCount`]
#' @family CAGEr plot functions
#' 
#' @examples 
#' plotReverseCumulatives(exampleCAGEset, fitInRange = c(10,500), onePlot = TRUE)
#' plotReverseCumulatives(exampleCAGEset, values = "normalized", onePlot = TRUE)
#' 
#' plotReverseCumulatives( exampleCAGEexp, xlim = c(1, 1e4), ylim = c(1, 1e5)
#'                       , fitInRange = c(5,100), onePlot = TRUE)
#' plotReverseCumulatives( exampleCAGEexp, values = "normalized"
#'                       , fitInRange = c(200, 2000), onePlot = TRUE)
#' plotReverseCumulatives( exampleCAGEexp[,4:5], fitInRange = c(5,100)
#'                       , onePlot = TRUE, main = "prim6 replicates")
#' 
#' @importFrom graphics abline box legend par plot strwidth text
#' @importFrom stats cor median
#' @importFrom VGAM zeta
#' @export

setGeneric( "plotReverseCumulatives"
          , function( object, values = c("raw", "normalized")
                    , fitInRange = c(10, 1000)
                    , onePlot = FALSE, main = NULL, legend = TRUE
                    , xlab = "number of CAGE tags"
                    , ylab = "number of CTSSs (>= nr tags)"
                    , xlim = c(1, 1e5)
                    , ylim = c(1, 1e6))
	standardGeneric("plotReverseCumulatives"))

#' @rdname plotReverseCumulatives

setMethod( "plotReverseCumulatives", "CAGEr"
         , function ( object, values, fitInRange, onePlot, main, legend
                    , xlab, ylab, xlim, ylim) {
	sample.labels <- sampleLabels(object)
	values <- match.arg(values)
	#pdf(file = paste("CTSS_reverse_cumulatives_", values, "_all_samples.pdf", sep = ""), width = 8, height = 8, onefile = T, bg = "transparent", family = "Helvetica", fonts = NULL)
	old.par <- par(mar = c(5,5,5,2))
	on.exit(par(old.par))
	cols <- names(sample.labels)
	
  tag.count <- switch( values
                     , raw        = CTSStagCountDF(object)
                     , normalized = CTSSnormalizedTpmDF(object))

	if(! is.null(fitInRange)) {
		fit.coefs.m <- as.matrix(data.frame(lapply(tag.count, function(x) {.fit.power.law.to.reverse.cumulative(values = decode(x), val.range = fitInRange)})))
		fit.slopes <- fit.coefs.m[1,]
		names(fit.slopes) <- sample.labels
		reference.slope <- min(median(fit.slopes), -1.05)
		reference.library.size <- 10^floor(log10(median(sapply(tag.count, sum))))
#reference.intercept <- log(reference.library.size/zeta(-1*reference.slope))  # intercept on natural logarithm scale
		reference.intercept <- log10(reference.library.size/VGAM::zeta(-1*reference.slope))  # intercept on log10 scale used for plotting with abline
	}
	
	if(onePlot == TRUE){
		.plotReverseCumulative( values = tag.count[, 1]
		                      , col    = cols[1]
		                      , title  = ifelse(is.null(main), "All samples", main)
		                      , xlab   = xlab, ylab = ylab
		                      , xlim   = xlim, ylim = ylim)
		if(length(sample.labels) > 1){
			sapply(c(2:length(sample.labels)), function(x)
			  .plotReverseCumulative( values = tag.count[, sample.labels[x]]
			                        , col = cols[x]
			                        , add = TRUE))
		}
		if(!is.null(fitInRange)) {
			abline(v = fitInRange, lty = "dotted")
			abline(a = reference.intercept, b = reference.slope, col = "#7F7F7F7F", lty = "longdash")
			main.legend.text <- sprintf("(%.2f) %s", -1 * fit.slopes, sample.labels)
			legend("bottomleft", legend = c("Ref. distribution:", paste(" alpha = ", sprintf("%.2f", -1*reference.slope), sep = ""), paste(" T = ", reference.library.size, sep = "")), bty = "n", col = NA, text.col = "#7F7F7F", cex = 1.3, y.intersp = 1.2)
		} else {
			main.legend.text <- sample.labels
		}
		if (isTRUE(legend))
		  legend( "topright"
		        , legend    = main.legend.text
		        , bty       = "n"
		        , col       = cols
		        , text.col  = cols
		        , lwd       = 2
		        , cex       = 1.3
		        , y.intersp = 1.2)
	}else{
		if(!is.null(fitInRange)) {
			sapply(sample.labels, function(x) {
				.plotReverseCumulative( values    = tag.count[, x]
				                      , col       = cols[which(sample.labels == x)]
				                      , title     = x
				                      , col.title = cols[which(sample.labels == x)]
				                      , xlab      = xlab, ylab = ylab
				                      , xlim      = xlim, ylim = ylim)
				abline(v = fitInRange, lty = "dotted"); abline(a = reference.intercept, b = reference.slope, col = "#7F7F7F7F", lty = "longdash"); text(min(fitInRange), 10^6, labels = paste(" alpha =", formatC(-1*fit.slopes[x], format = "f", digits = 2), sep = " "), adj = c(0,1), col = cols[which(sample.labels == x)], cex = 1.3); legend("bottomleft", legend = c("Ref. distribution:", paste(" alpha = ", sprintf("%.2f", -1*reference.slope), sep = ""), paste(" T = ", reference.library.size, sep = "")), bty = "n", col = NA, text.col = "#7F7F7F", cex = 1.3, y.intersp = 1.2)})
		} else {
			sapply( sample.labels, function(x)
				.plotReverseCumulative( values    = tag.count[, x]
				                      , col       = cols[which(sample.labels == x)]
				                      , title     = x
				                      , col.title = cols[which(sample.labels == x)]
				                      , xlab      = xlab, ylab = ylab
				                      , xlim      = xlim, ylim = ylim)
			)
		}
	}
	invisible(TRUE)
})


#' @name exportCTSStoBedGraph
#' 
#' @title Creating bedGraph/bigWig tracks of CAGE transcription starts sites
#' 
#' @description Creates bedGraph or BigWig file(s) with track(s) of CAGE signal supporting
#' each TSS that can be visualised in the UCSC Genome Browser.
#' 
#' @param object A \code{\link{CAGEr}} object.
#' 
#' @param values Specifies which values will be exported to the bedGraph file. Can be either
#'        \code{"raw"} to export raw tag count values or \code{"normalized"} to export
#'        normalized values.
#' 
#' @param format The format of the output. 
#' 
#' @param oneFile Logical, should all CAGE datasets be exported as individual tracks into the
#'        same bedGraph file (TRUE) or into separate bedGraph files (FALSE). Used only when
#'        \code{format="bedGraph"}, otherwise ignored.
#' 
#' @return Creates bedGraph or BigWig file(s) in the working directory that can be directly
#' visualised as custom tracks in the UCSC Genome Browser.  If \code{format="bedGraph"} and
#' \code{oneFile = TRUE} one bedGraph file containing multiple annotated tracks will be created,
#' otherwise two files per CAGE dataset will be created, one for plus strand and one for minus
#' strand CTSSs, and they will be named according to the labels of individual datasets.  All
#' bedGraph files contain headers with track description and can be directly uploaded as custom
#' tracks to the UCSC Genome Browser. 
#' 
#' When \code{format="bigWig"}, two binary BigWig files per CAGE dataset are created, one for
#' plus strand and one for minus strand CTSSs. Since BigWig files cannot contain headers with
#' track description, a separate file named "CTSS.normalized.all.samples.track.description.txt"
#' is created, which contains track headers for all BigWig files. To use these headers for
#' adding custom tracks to the UCSC Genome Browser, move the BigWig files to a web location and
#' edit the bigDataUrl sections in the headers file to point to corresponding BigWig files.
#' 
#' @author Vanja Haberle
#' 
#' @family CAGEr export functions
#' 
#' @seealso \code{\link{normalizeTagCount}}
#' 
#' @examples
#' exportCTSStoBedGraph(exampleCAGEset, values = "normalized", format = "bedGraph", oneFile = TRUE)
#' exportCTSStoBedGraph(exampleCAGEset, values = "normalized", format = "BigWig", oneFile = TRUE)
#' 
#' @export

setGeneric( "exportCTSStoBedGraph"
          , function(object, values = "normalized", format = "BigWig", oneFile = TRUE)
            	standardGeneric("exportCTSStoBedGraph"))

#' @rdname exportCTSStoBedGraph

setMethod("exportCTSStoBedGraph", "CAGEr", function (object, values, format, oneFile) {
  rr <- CTSScoordinatesGR(object)
  genome <- getRefGenome(genomeName(object))
  seqinfo(rr) <- seqinfo(genome)[seqlevels(rr)]

  if (values == "raw") {
    data <- SummarizedExperiment( rowRanges = rr
                                , assays    = SimpleList(CTSStagCountDF(object)))
  } else if (values == "normalized"){
    data <- SummarizedExperiment( rowRanges = rr
                                , assays    = SimpleList(CTSSnormalizedTpmDF(object)))
  } else {
    stop("'values' parameter must be one of the (\"raw\", \"normalized\")")
  }

  if (format == "BigWig"){
    .export.bw.all(data = data, sample.labels = sampleLabels(object), v = values, genome = genome)
  } else if (format == "bedGraph"){
    .export.bedgraph.all(data = data, sample.labels = sampleLabels(object), v = values, oneFile = oneFile)
  } else {
    stop("'format' parameter must be one of the (\"BigWig\", \"bedGraph\")")
  }
	
  message("\n", format, " file(s) for CTSS ", values, " counts have been created in your working directory (", getwd(), ")")
  invisible(1)
})

#' @name plotInterquantileWidth
#' 
#' @title Plot cluster widths
#' 
#' @description Plots histograms of the interquantile width of tag clusters or
#' consensus clusters in each CAGE dataset.
#' 
#' @param object A \code{\link{CAGEr}} object
#' 
#' @param clusters Which clusters to be used.  (\code{"tagClusters"} for \emph{tag clusters}
#'        or \code{"consensusClusters"} for \emph{consensus clusters}.
#' 
#' @param tpmThreshold Exclude clusters with normalized signal \code{< tpmThreshold}.
#' 
#' @param qLow,qUp Position of which "lower" (resp. "upper") quantile should be used as 5'
#'        (resp. 3') boundary.  See Details.
#' 
#' @param xlim The x axis limits of the plot.
#' 
#' @param ... Additional arguments passed to \code{plot()} function, such as \code{ylim},
#'        \emph{etc.}.
#' 
#' @details Interquantile width is a width (in base-pairs) of the central part of the genomic
#' region (bounded by the positions of specified \code{qLow} and \code{qUp} quantiles) that
#' contains \code{>= (qUp - qLow) * 100\%} of the CAGE signal.  Positions of specified quantiles
#' within each cluster have to be calculated beforehand by calling \code{\link{quantilePositions}}
#' function.  Interquantile width is a more robust measure of the promoter width than the total
#' span of the region, because it takes into account the magnitude of the expression in the
#' region.
#' 
#' @author Vanja Haberle
#' 
#' @family CAGEr plot functions
#' @family CAGEr clusters functions
#' 
#' @importFrom graphics hist
#' 
#' @examples
#' plotInterquantileWidth( object = exampleCAGEset, clusters = "tagClusters"
#'                       , tpmThreshold = 50, qLow = 0.1, qUp = 0.9)
#' 
#' plotInterquantileWidth( exampleCAGEexp, clusters = "consensusClusters"
#'                       , tpmThreshold = 50, qLow = 0.1, qUp = 0.9)
#' 
#' @export

setGeneric( "plotInterquantileWidth"
          , function( object
                    , clusters = c("tagClusters", "consensusClusters")
                    , tpmThreshold = 5
                    , qLow = 0.1, qUp = 0.9, xlim = c(0,150), ...)
            	standardGeneric("plotInterquantileWidth"))

#' @rdname plotInterquantileWidth

setMethod( "plotInterquantileWidth", "CAGEr"
         , function (object, clusters, tpmThreshold, qLow, qUp, xlim, ...) {
           
	sample.labels <- sampleLabels(object)
	cols <- names(sample.labels)
	clusters <- match.arg(clusters)
	getClustFun <- switch( clusters
	                     , tagClusters       = tagClustersGR
	                     , consensusClusters = consensusClustersGR)

  old.par <- par(mar = c(5,5,5,1))
  on.exit(par(old.par))
  
	sapply(sample.labels, function(x) {
    gr <- getClustFun(object, x, returnInterquantileWidth = TRUE, qLow = qLow, qUp = qUp)
    gr <- gr[score(gr) >= tpmThreshold]
    width <- decode(gr$interquantile_width)
    h <- hist(width, breaks = round(max(width)/2), plot = F)
    h$counts <- h$counts/sum(h$counts)
    col <- as.integer(col2rgb(cols[which(sample.labels == x)]))/255
    plot( h
        , xlim = xlim
        , main = x
        , xlab = paste0(clusters, " interquantile width q", qLow, "-q", qUp, " (bp)")
        , ylab = "relative frequency"
        , col = rgb(col[1], col[2], col[3], 0.5)
        , border = cols[which(sample.labels == x)]
        , cex.axis = 1.8
        , cex.lab = 1.8
        , cex.main = 2.5
        , col.main = cols[which(sample.labels == x)]
        , ...)
  })
	invisible(1)
})

#' @name plotExpressionProfiles
#' 
#' @title Plotting expression profiles derived from CAGE data
#' 
#' @description Creates a chart with beanplots representing distribution of normalized
#' expression across CAGE experiments for individual expression classes.  Different
#' expression classes are shown in different colors and are labeled according to the labels
#' returned by expression clustering.
#' 
#' @param object A \code{\link{CAGEset}} object.
#' 
#' @param what Which level of expression clustering should be used.  Can be either
#' \code{"CTSS"} to plot expression profiles of individual CTSSs or \code{"consensusClusters"}
#' to plot expression profiles of consensus clusters.
#' 
#' @details The created file contains beanplots representing distribution of normalized
#' expression across CAGE experiments for individual expression classes shown in separate
#' boxes.  Each labeled box represents one expression class and contains a set of
#' beanplots - one per CAGE experiment. Individual CAGE experiments are shown on X-axis and
#' scaled normalized expression on Y-axis.  Individual beanplots show distribution of
#' normalized expression values of elements belonging to specific expression class in
#' particular CAGE experiment, and the entire box represents single expression profile.
#' Different expression classes (boxes) are plotted in different colors and are labeled with
#' labels returned by expression clustering.
#' 
#' @author Vanja Haberle
#' 
#' @family CAGEr plot functions
#' 
#' @seealso \code{\link{getExpressionProfiles}}, \code{\link{expressionClasses}},
#'          \code{\link{extractExpressionClass}}.
#'          
#' @examples
#' plotExpressionProfiles(object = exampleCAGEset, what = "CTSS")
#' 
#' @export

setGeneric( "plotExpressionProfiles"
          , function(object, what) standardGeneric("plotExpressionProfiles"))

#' @rdname plotExpressionProfiles

setMethod( "plotExpressionProfiles", "CAGEset", function (object, what){
	
	if(what == "CTSS") {
		
		cl <- CTSSexpressionClasses(object)
		if(length(cl)>0){
			tpm.mx <- object@normalizedTpmMatrix
			tpm.mx <- tpm.mx[as.integer(names(cl)),]
			cl.method <- object@CTSSexpressionClusteringMethod
		}else{
			stop("No CTSS expression profiling has been done yet! Run getExpressionProfiles function first!")
		}
		
	}else if(what == "consensusClusters"){
		
		cl <- consensusClustersExpressionClasses(object)
		if(length(cl)>0){
			tpm.mx <- consensusClustersTpm(object)
			tpm.mx <- tpm.mx[as.integer(names(cl)),]
			cl.method <- consensusClustersExpressionClusteringMethod(object)
		}else{
			stop("No consensusClusters expression profiling has been done yet! Run getExpressionProfiles function first!")
		}
		
	}else{
		stop("'what' parameter must be one of the (\"CTSS\", \"consensusClusters\")")
	}
	
	l <- .extract.cluster.info(tpm.mx, cl)
	cl <- l[[1]]
	m <- l[[2]]
	# file_name = paste(what, "_expression_profiles.pdf", sep = "")
	# pdf(file = file_name, height = 5.5 * (max(cl[,2]) + 1) + 3, width = 6 * (max(cl[,1]) + 1))
	old.par <- par(mfrow = c(max(cl[,2]) + 1, max(cl[,1]) + 1))
	on.exit(par(old.par))
	suppressWarnings(.plot.clusters.beanplots(value.matrix = m, cl = cl, cl.method = cl.method, dim.som.x = max(cl[,1]) + 1, dim.som.y = max(cl[,2]) + 1, cex.axis = 1, las = 2))
	# dev.off()
	# message("\nFile '", file_name, "' has been created in your working directory (", getwd(), ")"
	invisible(TRUE)
})


#' @name exportToBed
#' 
#' @title Create BED tracks of TSSs and clusters of TSSs
#' 
#' @description Creates BED file(s) with track(s) of individual CTSSs, tag
#' clusters or consensus clusters.  CTSSs and consensus clusters can be
#' optionally colored in the color of their expression class.  _Tag clusters_
#' and _consensus clusters_ can be displayed in a gene-like representation with
#' a line showing full span on the cluster, filled block showing interquantile
#' range and a thick box denoting position of the dominant (most frequently
#' used TSS.
#' 
#' @param object A [`CAGEr`] object.
#' 
#' @param what Which elements should be exported to BED track.  `CTSS` to export
#' individual CTSSs,  `tagClusters` to export tag clusters or `consensusClusters`
#' to export consensus clusters.
#' 
#' @param qLow,qUp Position of which "lower" (resp. "upper") quantile should be
#' used as 5' (resp. 3') boundary of the filled block in gene-like
#' representation of the cluster.  Default value `NULL` uses start (resp. end)
#' position of the cluster.  Ignored when `what = "CTSS"`.
#' 
#' @param colorByExpressionProfile Logical, should blocks be colored in the
#' color of their corresponding expression class.  Ignored when
#' `what = "tagClusters"`.
#' 
#' @param oneFile Logical, should all CAGE datasets be exported as individual
#' tracks into the same BED file (`TRUE`) or into separate BED files (`FALSE`).
#' Ignored when `what = "CTSS"`, which by default produces only one track.
#' 
#' @details The BED representations of _CTSSs_, _tag cluster_ and
#' _consensus clusters_ can be directly visualised in the ZENBU or UCSC Genome
#' Browsers.
#' 
#' When `what = "CTSS"`, one BED file with single track of 1 bp blocks
#' representing all detected CTSSs (in all CAGE samples) is created.  CTSSs can
#' be colored according to their expression class (provided the expression
#' profiling of CTSSs was done by calling [`getExpressionProfiles`] function).
#' Colors of expression classes match the colors in which they are shown in the
#' plot returned by the [`plotExpressionProfiles`] function.  For
#' `colorByExpressionProfile = FALSE`, CTSSs included in the clusters are
#' shown in black and CTSSs that were filtered out in gray.
#' 
#' When `what = "tagClusters"`, one track per CAGE dataset is created, which can
#' be exported to a single BED file (by setting `oneFile = TRUE`) or separate
#' BED files (`FALSE`).  If no quantile boundaries were provided (`qLow` and
#' `qUp` are `NULL`, TCs are represented as simple blocks showing the full
#' span of TC fromthe start to the end.  Setting `qLow` and/or `qUp` parameters
#' to a value of the desired quantile creates a gene-like representation with a
#' line showing full span of the TC, filled block showing specified
#' interquantile range and a thick 1 bp block denoting position of the dominant
#' (most frequently used) TSS.  All TCs in one track (one CAGE dataset) are
#' shown in the same color.
#' 
#' When `what = "consensusClusters"` _consensus clusters_ are exported to BED
#' file.  Since there is only one set of consensus clusters common to all CAGE
#' datasets, only one track is created in case of a simple representation.  This
#' means that when `qLow = NULL` and `qUp = NULL` one track with blocks showing
#' the full span of consensus cluster from the start to the end is created.
#' However, the distribution of the CAGE signal within consensus cluster can be
#' different in different CAGE samples, resulting in different positions of 
#' quantiles and dominant TSS.  Thus, when `qLow` and/or `qUp` parameters
#' are set to a value of the desired quantile, a separate track with a gene-like
#' representation is created for every CAGE dataset.  These tracks can be
#' exported to a single BED file (by setting `oneFile = TRUE`) or separate
#' BED files (by setting `oneFile = FALSE`).  The gene-like representation is
#' analogous to the one described above for the TCs.  In all cases consensus
#' clusters can be colored according to their expression class (provided the
#' expression profiling of consensus clusters was done by calling
#' `getExpressionProfiles` function).  Colors of expression classes match the
#' colors in which they are shown in the plot returned by the
#' `plotExpressionProfiles` function.  For `colorByExpressionProfile = FALSE`
#' all consensus clusters are shown in black.
#' 
#' @return Creates BED file(s) in the working directory.
#' 
#' @author Vanja Haberle
#' 
#' @family CAGEr export functions
#' 
#' @examples 
#' 
#' ### exporting CTSSs colored by expression class
#' exportToBed(object = exampleCAGEset, what = "CTSS", colorByExpressionProfile = TRUE)
#' 
#' ### exporting tag clusters in gene-like representation
#' exportToBed( object = exampleCAGEset, what = "tagClusters"
#'            , qLow = 0.1, qUp = 0.9, oneFile = TRUE)
#'            
#' exportToBed( object = exampleCAGEexp, what = "tagClusters"
#'            , qLow = 0.1, qUp = 0.9, oneFile = TRUE)
#' 
#' @export

setGeneric( "exportToBed"
          , function( object, what = c("CTSS", "tagClusters", "consensusClusters")
                    , qLow = NULL, qUp = NULL
                    , colorByExpressionProfile = FALSE, oneFile = TRUE)
            	standardGeneric("exportToBed"))

#' @rdname exportToBed

setMethod("exportToBed", "CAGEr", function( object, what, qLow, qUp
                                          , colorByExpressionProfile, oneFile) {
	sample.labels <- sampleLabels(object)
	what <- match.arg(what)

	if(what == "CTSS") {
		
		oneFile <- TRUE
		use.blocks <- F
		ctss <- CTSScoordinates(object)
		#filtered_ctss <- object@filteredCTSSidx

		if(colorByExpressionProfile == TRUE){
			cl <- CTSSexpressionClasses(object)
			n <- names(cl)
			cl <- .extract.cluster.info(cl = cl)
			cl <- data.frame(ctss = n, x.cor = cl[,1], y.cor = cl[,2])		
			ctss <- merge(cl, ctss, by.x = "ctss", by.y = 0, all.x = T, all.y = F)
			ctss <- data.frame(ctss = ctss$ctss, chr = ctss$chr, start = ctss$pos-1, end = ctss$pos, strand = ctss$strand, x.cor = ctss$x.cor, y.cor = ctss$y.cor)
			track.file <- "CTSS.colored.by.expression.profile.bed"
			track.names <- list("CTSS (colored by expression profile)")
			clustering.method <- CTSSexpressionClusteringMethod(object)
		}else{
			ctss <- data.frame(chr = ctss$chr, start = ctss$pos-1, end = ctss$pos, strand = ctss$strand)
			track.file <- "CTSS.pooled.samples.bed"
			track.names <- list("CTSS (pooled samples)")
			#filtered_cols <- c("TRUE" = c("0,0,0"), "FALSE" = c("127,127,127"))
			#cols = list(filtered_cols[as.character(filtered_ctss)])
			cols = list(c("0,0,0"))
		}
		clusters.q.list = list(ctss)
		
	}else if(what == "tagClusters") {
		
		colorByExpressionProfile <- FALSE
		
		if(!is.null(qLow)) {
		
		use.blocks <- T
		q.low <- tagClustersQuantileLow(object, q = qLow)
		q.low <- lapply(q.low, function(x) {colnames(x)[2:ncol(x)] <- paste("qLow_", do.call(rbind, strsplit(colnames(x)[2:ncol(x)], split = "_", fixed = T))[,2], sep = ""); return(x)})
		q.up <- tagClustersQuantileUp(object, q = qUp)
		q.up <- lapply(q.up, function(x) {colnames(x)[2:ncol(x)] <- paste("qUp_", do.call(rbind, strsplit(colnames(x)[2:ncol(x)], split = "_", fixed = T))[,2], sep = ""); return(x)})
		q <- lapply(as.list(1:length(sample.labels)), function(x) {merge(q.low[[x]], q.up[[x]], by.x = "cluster", by.y = "cluster")})
		names(q) <- sample.labels
		clusters <- lapply(as.list(sample.labels), function(x) {tagClusters(object, samples = x)})
		names(clusters) <- sample.labels
		clusters.q.list <- lapply(as.list(sample.labels), function(x) {merge(clusters[[x]], q[[x]], by.x = "cluster", by.y = "cluster")})
		track.names <- paste(sample.labels, paste(" (tag clusters (TC) q(", qLow, ")-q(",qUp,"))", sep = ""), sep = "")
		r <- paste(".qLow", qLow, "_qUp", qUp, sep = "")
			
		}else{
			use.blocks <- F
			clusters.q.list <- lapply(as.list(sample.labels), function(x) {tagClusters(object, samples = x)})			
			track.names <- paste(sample.labels, paste(" (tag clusters (TC))", sep = ""), sep = "")
			r <- ""
		}
		
		names(clusters.q.list) <- sample.labels
		itemRgb = FALSE
		cols <- names(sample.labels)
		cols <- as.list(apply(sapply(cols, function(x) {as.integer(col2rgb(x))}), 2, function(y) {paste(y, collapse = ",")}))
		names(cols) <- sample.labels

		if(oneFile){
			track.file <- rep(paste("All.samples.tagClusters", r, ".bed", sep = ""), length(clusters.q.list))
		}else{
			track.file <- paste(sample.labels, ".tagClusters", r, ".bed", sep = "")
		}
		
	}else if(what == "consensusClusters"){
		
		clusters <- consensusClusters(object)
		colnames(clusters)[1] = "cluster"
		if(!(colorByExpressionProfile)){
			cols <- as.list(rep("0,0,0", length(sample.labels)))
		}else{
			clustering.method <- consensusClustersExpressionClusteringMethod(object)		
		cl <- consensusClustersExpressionClasses(object)
		n <- names(cl)
		cl <- .extract.cluster.info(cl = cl)
		cl <- data.frame(cluster = n, x.cor = cl[,1], y.cor = cl[,2])		
		clusters <- merge(clusters, cl, by.x = "cluster", by.y = "cluster")
		}
		
		if(!is.null(qLow) & (length(consensusClustersQuantileLow(object))>0 & length(consensusClustersQuantileUp(object))>0)) {

		if(paste("q_", qLow, sep = "") %in% colnames(consensusClustersQuantileLow(object)[[1]]) & paste("q_", qUp, sep = "") %in% colnames(consensusClustersQuantileUp(object)[[1]])){
		
		use.blocks <- T
		q.low <- consensusClustersQuantileLow(object)
		q.low <- lapply(q.low, function(x) {colnames(x)[2:ncol(x)] <- paste("qLow_", do.call(rbind, strsplit(colnames(x)[2:ncol(x)], split = "_", fixed = T))[,2], sep = ""); return(x)})
		q.up <- consensusClustersQuantileUp(object)
		q.up <- lapply(q.up, function(x) {colnames(x)[2:ncol(x)] <- paste("qUp_", do.call(rbind, strsplit(colnames(x)[2:ncol(x)], split = "_", fixed = T))[,2], sep = ""); return(x)})
		q <- lapply(as.list(1:length(sample.labels)), function(x) {merge(q.low[[x]], q.up[[x]], by.x = "cluster", by.y = "cluster")})
		names(q) <- sample.labels
						
		cumsums <- CTSScumulativesCC(object)
		dom.pos <- list()
		for(i in 1:length(cumsums)){
			a <- lapply(cumsums[[i]], function(y) {.get.dominant.ctss(as.numeric(y), isCumulative = T)})
			b <- data.frame(cluster = as.integer(names(a)), dominant_ctss = unlist(a))
			dom.pos[[i]] <- b
		}
		names(dom.pos) <- names(cumsums)
		
		clusters.q.list <- lapply(as.list(sample.labels), function(x) {a <- merge(clusters, dom.pos[[x]], by.x = "cluster", by.y = "cluster", all.x = F, all.y = T); a$dominant_ctss <- a$start + a$dominant_ctss; b <- merge(a, q[[x]], by.x = "cluster", by.y = "cluster"); return(b)})
		track.names <- paste(sample.labels, paste(" (consensus clusters q(", qLow, ")-q(",qUp,"))", sep = ""), sep = "")
		r <- paste(".qLow", qLow, "_qUp", qUp, sep = "")
		quantiles <- T
		}else{
			stop("No data for given quantile positions! Run 'quantilePositions()' function for desired quantiles first, or omit 'qLow' and 'qUp' parameters to use start and end coordinates instead!")
		}
		}else if(!is.null(qLow)){
			stop("No data for given quantile positions! Run 'quantilePositions()' function for desired quantiles first, or omit 'qLow' and 'qUp' parameters to use start and end coordinates instead!")
		}else{
			use.blocks <- F
			clusters.q.list <- list(clusters)			
			track.names <- "Consensus clusters"
			r <- ""
			oneFile <- T
			quantiles <- F
		}
				   
		if(oneFile){
			if(quantiles){
			track.file <- rep(paste("All.samples.consensusClusters", r, ".bed", sep = ""), length(clusters.q.list))
			}else{
				track.file <- "ConsensusClusters.bed"
			}
		}else{
			track.file <- paste(sample.labels, ".consensusClusters.", r, ".bed", sep = "")		
		}
	}

	if(colorByExpressionProfile == TRUE){
		
		itemRgb <- TRUE
		cols.init <- c("red", "gold", "green", "blue")
		color.matrix.solid <- .myColorMatrix(matrix(cols.init, nrow = 2), nrows = max(cl$y.cor)+1, ncols = max(cl$x.cor)+1)
		color.matrix.solid <- matrix(as.vector(color.matrix.solid), ncol = max(cl$x.cor)+1, byrow = F)
		cols <- lapply(clusters.q.list, function(x) {m <- as.matrix(x[,c("y.cor", "x.cor")]); a <- apply(t(col2rgb(color.matrix.solid[m+1])), 1, function(x) {paste(x, collapse = ',')}); return(a)})
		if(clustering.method == "som"){
			names <- lapply(clusters.q.list, function(x) {paste(x[,"x.cor"], x[,"y.cor"], sep = "_")})
		}else if(clustering.method == "kmeans"){
			names <- lapply(clusters.q.list, function(x) {x[,"x.cor"]})
		}
		
	}else{
		itemRgb <- FALSE
		names <- as.list(rep(".", length(clusters.q.list)))
	}
	
	track.descriptions <- track.names
	
	for(i in 1:length(clusters.q.list)){
		if(i == 1){
			app <- F
		}else{
			app <- T
		}
		if(!oneFile){
			if(file.exists(track.file[i])){
				file.remove(track.file[i])
			}				
		}
		.make.cluster.bed.track(clusters.q = clusters.q.list[[i]], use.blocks = use.blocks, q.low = qLow, q.up = qUp, track.file = track.file[i], track.name = track.names[i], track.description = track.descriptions[i], cols = cols[[i]], name = names[[i]], itemRgb = itemRgb, app = app)
		
	}
	
	if(oneFile){
		message("\nFile '", track.file[1], "' has been created in your working directory (", getwd(), ")")	
	}else{
		message("\nFiles '", sub(sample.labels[1], "*", track.file[1]), "' for all samples have been created in your working directory (", getwd(), ")")
	}
})
Any scripts or data that you put into this service are public.
CAGEr documentation built on Jan. 17, 2021, 2 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
CAGEr
Analysis of CAGE (Cap Analysis of Gene Expression) sequencing data for precise mapping of transcription start sites and promoterome mining

R/ExportMethods.R
In CAGEr: Analysis of CAGE (Cap Analysis of Gene Expression) sequencing data for precise mapping of transcription start sites and promoterome mining

Try the CAGEr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

CAGEr Analysis of CAGE (Cap Analysis of Gene Expression) sequencing data for precise mapping of transcription start sites and promoterome mining

R/ExportMethods.R In CAGEr: Analysis of CAGE (Cap Analysis of Gene Expression) sequencing data for precise mapping of transcription start sites and promoterome mining

Try the CAGEr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

CAGEr
Analysis of CAGE (Cap Analysis of Gene Expression) sequencing data for precise mapping of transcription start sites and promoterome mining

R/ExportMethods.R
In CAGEr: Analysis of CAGE (Cap Analysis of Gene Expression) sequencing data for precise mapping of transcription start sites and promoterome mining