R/dataExport.R

Defines functions rnb.section.export.csv rnb.execute.export.csv rnb.execute.export.csv.save rnb.execute.tnt rnb.section.tnt rnb.section.export.ct.adj rnb.export.to.ewasher rnb.export.to.trackhub rnb.convert.bed.to.bigBed rnb.convert.bedGraph.to.bigWig create.ucsc.track.hub rnb.diffmeth.to.EpiExplorer.file rnb.RnBSet.to.bedGraph rnb.RnBSet.to.bed rnb.RnBSet.to.GRangesList rnb.export.fail

Documented in rnb.execute.export.csv rnb.execute.tnt rnb.export.to.ewasher rnb.export.to.trackhub rnb.RnBSet.to.bed rnb.RnBSet.to.bedGraph rnb.RnBSet.to.GRangesList

########################################################################################################################
## dataExport.R
## created: 2013-01-15
## creator: Fabian Mueller
## ---------------------------------------------------------------------------------------------------------------------
## Data exporting routines
########################################################################################################################

#' rnb.export.fail
#'
#' Adds a paragraph describing why exporting data for a specific region type might fail.
#'
#' @param report Report to contain the description.
#' @param txt Text to be prepended to the paragraph.
#' @author Yassen Assenov
#' @noRd
rnb.export.fail <- function(report, txt = character()) {
	txt <- c(txt, "There are several reasons why a certain output file cannot be (fully) generated. Examples include:")
	rnb.add.paragraph(report, txt)
	txt <- list(
		"The corresponding region type is invalid.",
		"The corresponding region type is not supported by the dataset.",
		"Due to security restrictions, the creation of files in the output directory is not allowed.",
		"A file or directory with the same name exists and cannot be overwritten.",
		"The disk is full or the user quota is exceeded.")
	rnb.add.list(report, txt)
}

########################################################################################################################

#' rnb.RnBSet.to.GRangesList
#'
#' convert an \code{\linkS4class{RnBSet}} object to a \code{GRangesList} object
#' @param rnb.set Object of class \code{\linkS4class{RnBSet}}
#' @param reg.type region type to be converted
#' @param return.regular.list flag indicating whether a regular \code{list} object should be returned instead
#'             of a \code{GRangesList}. Might improve performance in some cases
#' @return a \code{GRangesList} or \code{list} object with one list element (\code{GRanges}) for each sample in \code{rnb.set}
#' @author Fabian Mueller
#' @export
#' @examples
#' \donttest{
#' library(RnBeads.hg19)
#' data(small.example.object)
#' logger.start(fname=NA)
#' result <- rnb.RnBSet.to.GRangesList(rnb.set.example)
#' }
rnb.RnBSet.to.GRangesList <- function(rnb.set,reg.type="sites",return.regular.list=FALSE){
	if (!inherits(rnb.set, "RnBSet")) {
		stop("Invalid value for rnb.set: Expected RnBSet")
	}
	if (!(reg.type %in% c(rnb.region.types(assembly(rnb.set)),"sites"))){
		stop("Unsupported region type")
	}
	# logger.status("Gathering data") #DEBUG MESSAGE
	anno.sites <- annotation(rnb.set,type=reg.type)
	anno.sites <- anno.sites[,c("Chromosome","Start","End","Strand")]
	granges.coords <- data.frame2GRanges(anno.sites, ids = NULL, chrom.column = "Chromosome", start.column = "Start",
						end.column = "End", strand.column = "Strand", assembly = assembly(rnb.set), sort.result=FALSE)
	# logger.status("...getting data into the correct order") #DEBUG MESSAGE
	oo <- order(as.integer(seqnames(granges.coords)),start(granges.coords), end(granges.coords), as.integer(strand(granges.coords)))
	granges.coords <- granges.coords[oo]
	mm <- meth(rnb.set,type=reg.type)[oo,,drop=FALSE]
	ccov <- covg(rnb.set,type=reg.type)[oo,,drop=FALSE]

	# logger.status("Creating list for GRanges objects") #DEBUG MESSAGE
	rnbs.grl <- lapply(samples(rnb.set),FUN=function(ss){
		if (!is.null(ccov)){
			dd <- data.frame(score=mm[,ss],coverage=ccov[,ss])
		} else {
			dd <- data.frame(score=mm[,ss])
		}
		rows.no.nas <- !is.na(dd$score)
		granges.cur <- granges.coords[rows.no.nas]
		elementMetadata(granges.cur) <- dd[rows.no.nas,,drop=FALSE]
		# logger.status(c("...done processing sample",ss)) #DEBUG MESSAGE
		return(granges.cur)
	})
	names(rnbs.grl) <- samples(rnb.set)
	# logger.status("sorting entries")
	# rnbs.grl <- lapply(rnbs.grl , function(y) { y[order(as.integer(seqnames(y)),start(y), end(y), as.integer(strand(y))), ] })
	if (!return.regular.list){
		logger.status("converting to GRangesList") #DEBUG MESSAGE
		rnbs.grl <- GRangesList(rnbs.grl) #this is very memory intense for larger WGBS datasets
	}
	# #alternative: endoapply
	# rnbs.grl <- rep(GRangesList(granges.coords),length(samples(rnb.set)))
	# names(rnbs.grl) <- samples(rnb.set)
	# logger.status("Adding sample information") #DEBUG MESSAGE
	# i <- 1
	# rnbs.grl <- endoapply(rnbs.grl , function(gr){
	# 	#a bit of a hack getting the sample name using i in endoapply.
	# 	#If you can think of a better way, let me know.
	# 	#endoapply was necessary for performance reasons in large WGBS datasets
	# 	ss <- names(rnbs.grl)[i]
	# 	i <<- i + 1
	# 	logger.status(c("...processing sample",ss)) #DEBUG MESSAGE
	# 	if (!is.null(ccov)){
	# 		dd <- data.frame(score=mm[,ss],coverage=ccov[,ss])
	# 	} else {
	# 		dd <- data.frame(score=mm[,ss])
	# 	}
	# 	logger.status(c("......setting elementMetadata")) #DEBUG MESSAGE
	# 	elementMetadata(gr) <- dd
	# 	logger.status(c("......retrieving non-NAs")) #DEBUG MESSAGE
	# 	subset(gr,!is.na(elementMetadata(gr)[,"score"]))
	# })

	# rnbs.grl <- endoapply(rnbs.grl , function(y) { y[order(as.integer(seqnames(y)),start(y), end(y), as.integer(strand(y))), ] })
	return(rnbs.grl)
}

########################################################################################################################

#' Export to BED files
#'
#' Exports the beta values from a methylation dataset to BED files.
#' 
#' @param rnb.set Methylation dataset as an object of type inheriting \code{\linkS4class{RnBSet}}.
#' @param out.dir Output directory. If not existing, it will be created. otherwise files in that directory are overwritten.
#' @param reg.type Region type to be extracted.
#' @param names.quant.meth should the names of the bed regions contain information on the methylation level.
#' 		   If TRUE the following format is applied: meth_percent%[coverage]. Coverage is available only when
#' 			\code{covg(rnb.set)} is not NULL
#' @param add.track.line Add a track line to the bed file to enable browsers like IGV to display the data better
#' @param lexicographic Should lexicographic ordering be used for chromosome names
#' @param verbose More detailed logger output
#' @return (invisibly) a summary list containing information on the conversion step.
#'         elements are \code{filenames} (a table containing information on which sample has been written to what filename)
#'         and \code{assembly} (a string indicating the assembly used by \code{rnb.set}).
#' @details Details on the BED file format can be found in the \href{http://genome.ucsc.edu/FAQ/FAQformat.html}{UCSC Genome Browser
#'          documentation}.  Each methylation site is an entry in the resulting bed file. The Score column corresponds
#'          to a site's methylation value in the interval \code{[0,1]}.
#' @author Fabian Mueller
#' @export
#' @examples
#' \donttest{
#' library(RnBeads.hg19)
#' data(small.example.object)
#' logger.start(fname=NA)
#' rnb.RnBSet.to.bed(rnb.set.example,tempdir())
#' }
rnb.RnBSet.to.bed <- function(rnb.set,out.dir,reg.type="sites",names.quant.meth=TRUE,add.track.line=TRUE,lexicographic=FALSE,verbose=TRUE){
	if (!inherits(rnb.set, "RnBSet")) {
		stop("inavlid value for rnb.set")
	}
	if (!file.exists(out.dir)){
		dir.create(out.dir)
	}
	if (!(reg.type %in% c(rnb.region.types(assembly(rnb.set)),"sites"))){
		stop("Unsupported region type")
	}
	#convert RnBSet to GRangesList
	if (verbose) logger.status("Converting to GRangesList")
	rnb.set.grl <- rnb.RnBSet.to.GRangesList(rnb.set,reg.type=reg.type,return.regular.list=TRUE) #might take a while

	## Extract annotation table and methylation values
	tbl <- annotation(rnb.set, type = reg.type)[, c("Chromosome", "Start", "End", "Strand")]
	is.disjoint <- isDisjoint(GRanges(tbl$Chromosome, IRanges(tbl$Start, tbl$End), tbl$Strand))
	levels(tbl$Strand) <- c("+", "-", ".")
	tbl[, 2] <- tbl[, 2] - 1L # switch from 1-based closed intervals to 0-based half-open
	tbl[["ID"]] <- FALSE
	tbl[["Value"]] <- 0
	tbl <- tbl[, c(1:3, 5:6, 4)]
	if (lexicographic) {
		i.order <- order(as.integer(factor(as.character(tbl$Chromosome))))
	}

	## Prepare the resulting structure
	sample.ids <- samples(rnb.set)
	res <- list()
	res[["filenames"]] <- data.frame(i=1:length(sample.ids),sample=sample.ids,filename=paste0("rnbeads_sample",1:length(sample.ids),".bed"))
	res[["assembly"]] <- assembly(rnb.set)
	res[["contains.overlapping.regions"]] <- is.disjoint

	## output the text files
	for (i in 1:length(sample.ids)) {
		if (verbose) logger.status(c("Exporting sample", sample.ids[i]))
		mm <- as.vector(meth(rnb.set, type = reg.type, j = i))
		ii <- which(!is.na(mm))
		if (names.quant.meth) {
			cvg <- covg(rnb.set, reg.type, j = i)
			if (is.null(cvg)) {
				cvg <- "%"
			} else {
				cvg <- paste0("%[", cvg, "]")
			}
			bed.names <- paste0("'", round(mm * 100), cvg, "'")
		} else {
			bed.names <- rownames(tbl)
		}
		tbl$Value <- as.integer(round(mm * 1000))
		fn <- file.path(out.dir, res[["filenames"]][i, "filename"])
		if (add.track.line) {
			trackLine <- paste0("track name=\"",sample.ids[i],"\" description=\"",sample.ids[i]," methylation (",reg.type,")\" color=0,60,120 useScore=1")
			write(trackLine,file=fn)
		}
		if (lexicographic) {
			x <- tbl[intersect(i.order, ii), , drop = FALSE]
		} else {
			x <- tbl[ii, , drop = FALSE]
		}
		write.table(x,file=fn,quote=FALSE,sep="\t",row.names=FALSE,col.names=FALSE,na=".",append=add.track.line)
	}
	invisible(res)
}

########################################################################################################################

#' rnb.RnBSet.to.bedGraph
#'
#' Exports the methylation data of an \code{\linkS4class{RnBSet}} object to \code{*.bedGraph} files.
#'
#' @param rnb.set    Dataset as an instance of class \code{\linkS4class{RnBSet}}.
#' @param out.dir    One-element \code{character} vector signifying the output directory in which to create
#'                   \code{bedGraph} files. Setting this to \code{"."} (default) uses the current working directory. If
#'                   the output directory does not exist, this function attempts to create it. Any existing files in
#'                   this directory could be overwritten.
#' @param reg.type   Site or region type to be exported.
#' @param parameters Named \code{character} vector storing parameters (other than \code{"type"} and \code{"name"}) to
#'                   include in the track definition line. The names of this vector must be the parameter names, and its
#'                   elements - the corresponding values; missing values (\code{NA}s) are allowed neither for names, nor
#'                   for values. This function does not test if all provided parameter names and values conform to the
#'                   BedGraph track specification.
#' @param digits     Optionally, number of significant digits after the decimal point to round methylation values to. If
#'                   specified, this parameter must be an \code{integer} between \code{0} and \code{10}.
#' @return (invisibly) a summary list containing information on the conversion step.
#'         elements are \code{filenames} (a table containing information on which sample has been written to what filename)
#'         and \code{assembly} (a string indicating the assembly used by \code{rnb.set}).
#' @details The description of the BedGraph track format can be found \href{http://genome.ucsc.edu/goldenPath/help/bedgraph.html}{here}.
#'          Each methylation site is an entry in the resulting bedGraph file. The Score column corresponds to a site's
#'          methylation value in the interval \code{[0,1]}.
#' @author Fabian Mueller
#' @export
#' @examples
#' \donttest{
#' library(RnBeads.hg19)
#' data(small.example.object)
#' logger.start(fname=NA)
#' rnb.RnBSet.to.bedGraph(rnb.set.example,tempdir())
#' }
rnb.RnBSet.to.bedGraph <- function(rnb.set,out.dir=".",reg.type="sites",parameters=character(),digits=NULL){
	## Validate parameters
	if (!inherits(rnb.set, "RnBSet")) {
		stop("invalid value for rnb.set")
	}
	if (!(is.character(out.dir) && length(out.dir) == 1 && isTRUE(out.dir != ""))) {
		stop("invalid value for out.dir")
	}
	if (!(is.character(reg.type) && length(reg.type) == 1 && isTRUE(reg.type != ""))) {
		stop("invalid value for reg.type")
	}
	if (!is.character(parameters)) {
		stop("invalid value for parameters")
	}
	if (length(parameters) != 0) {
		if (any(is.na(parameters)) || is.null(names(parameters)) || any(is.na(names(parameters)))) {
			stop("invalid value for parameters")
		}
		parameters <- paste0(" ", paste(names(parameters), parameters, sep = "=", collapse = " "))
	}
	if (!is.null(digits)) {
		if (isTRUE(is.double(digits) && all(digits == as.integer(digits)))) {
			digits <- as.integer(digits)
		}
		if (!(is.integer(digits) && length(digits) == 1 && isTRUE(0L <= digits && digits <= 10L))) {
			stop("invalid value for digits")
		}
	}

	## Create output directory if needed
	if (file.exists(out.dir)) {
		if (!file.info(out.dir)[, "isdir"]) {
			stop("out.dir is an existing file")
		}
	} else if (!dir.create(out.dir, showWarnings = FALSE, recursive = TRUE)) {
		stop(paste("Could not create directory", out.dir))
	}

	## Extract annotation table and methylation values
	sample.ids <- samples(rnb.set)
	tbl <- annotation(rnb.set, type = reg.type)[, c("Chromosome", "Start", "End")]
	tbl[, 2] <- tbl[, 2] - 1L # switch from 1-based closed intervals to 0-based half-open
	tbl[["Value"]] <- 0
	mm <- meth(rnb.set, type = reg.type)

	## Export to bedGraph files
	nd <- as.integer(ceiling(log10(length(sample.ids)))) + 1L
	filenames <- data.frame(
		"Sample" = sample.ids,
		"File" = sprintf(paste0("rnbeads_sample_%0", nd, "d.bedGraph"), 1:length(sample.ids)),
		stringsAsFactors = FALSE)
	for (i in 1:length(sample.ids)) {
		fname <- file.path(out.dir, filenames[i, 2L])
		if (is.null(digits)) {
			tbl[, "Value"] <- mm[, i]
		} else {
			tbl[, "Value"] <- round(mm[, i], digits = digits)
		}
		j <- which(!is.na(mm[, i]))
		cat(paste0('track type=bedGraph name="', sample.ids[i], '"', parameters, "\n"), file = fname)
		write.table(tbl[j, ], fname, TRUE, FALSE, "\t", row.names = FALSE, col.names = FALSE)
	}
	are.disjoint <- tapply(1:nrow(tbl), as.character(tbl[, 1]), function(ii) {
		(length(ii) < 2) || all(tbl[ii[-length(ii)], 3] <= tbl[ii[-1], 2])
	})
	invisible(
		list("filenames" = filenames, "assembly" = assembly(rnb.set), "contains.overlapping.regions" = !all(are.disjoint)))
}

########################################################################################################################
########################################################################################################################

## rnb.diffmeth.to.EpiExplorer.file
##
## Export the differential methylation information to a file that can be read by EpiExplorer
## @author Fabian Mueller
rnb.diffmeth.to.EpiExplorer.file <- function(rnb.set, diffmeth, fname, comp.name, reg.type, referenceSet) {
	if (!(is.character(fname) && length(fname) == 1 && (!is.na(fname)))) {
		stop("invalid value for fname")
	}
	if (!(reg.type %in% c(rnb.region.types(assembly(rnb.set)),"sites")) || !(reg.type %in% c(names(rnb.set@regions),"sites"))){
		stop("Unsupported region type")
	}
	get.type <- reg.type
	if (reg.type=="sites") {
		get.type <- "CpG"
		dmt <- get.table(diffmeth,comp.name,"sites",return.data.frame=TRUE)
		col.rank <- "combinedRank"
		col.diff <- "mean.diff"
		col.quot <- "mean.quot.log2"
		col.p.val <- "diffmeth.p.adj.fdr"
		index.map <- rnb.set@sites
	} else {
		dmt <- get.table(diffmeth,comp.name,reg.type,return.data.frame=TRUE)
		col.rank <- "combinedRank"
		col.diff <- "mean.mean.diff"
		col.quot <- "mean.mean.quot.log2"
		col.p.val <- "comb.p.adj.fdr"
		index.map <- rnb.set@regions[[reg.type]]
	}
	aa.rnbd <- rnb.get.annotation(get.type,assembly(rnb.set))

	#go over each chromosome and get the corresponding diffmeth data as GRanges object
	dm.grl <- lapply(1:length(aa.rnbd),FUN=function(cci){
		res <- aa.rnbd[[cci]]
		cur.chrom.members <- index.map[,2]==cci #get all the indexes belonging to the current chromosome
		inds <- index.map[cur.chrom.members,3]
		dm.df <- dmt[cur.chrom.members,c(col.rank,col.diff,col.quot,col.p.val)]
		dm.df <- data.frame(inRnBSet=rep(TRUE,nrow(dm.df)),dm.df)
		na.vec <- rep(NA,length(res))
		empty.df <- data.frame(inRnBSet=rep(FALSE,length(res)),col.rank=na.vec,col.diff=na.vec,col.quot=na.vec,col.p.val=na.vec)
		names(empty.df) <- c("inRnBSet",col.rank,col.diff,col.quot,col.p.val)
		if (length(inds > 0)){
			empty.df[inds,] <- dm.df
		}
		elementMetadata(res) <- empty.df
		return(res)
	})
	dm.gr <- do.call(c,dm.grl)
	df2export <- as.data.frame(elementMetadata(dm.gr))

	write.table(format(df2export,scientific=FALSE),file=fname,
			quote=FALSE,sep="\t",row.names=FALSE,col.names=TRUE,na="")
	return(fname)
}

########################################################################################################################

## create.ucsc.track.hub
##
## creates the directory structure and essential files for a UCSC track hub.
## @param hub.dir directory where the track hub should be created. Must be nonexisting.
## @param rnb.set Object of class \code{RnBSet} for which the hub is to be created.
## @param data.type either "bigBed" or "bigWig"
## @param ana.name a name for the analysis
## @param ana.desc a decription of the analysis
## @param email email address of the creator
## @return nothing of particular interest
## @details For details on UCSC track hubs see \href{http://genome.ucsc.edu/goldenPath/help/hgTrackHubHelp.html}{here}.
## @author Fabian Mueller
create.ucsc.track.hub <- function(hub.dir,rnb.set,data.type="bigBed",ana.name="RnBeads Analysis",ana.desc="Methylation data processed by RnBeads",email="-@-.com"){
	if (file.exists(hub.dir)){
		stop("Could not create UCSC Hub directory as the specified directory already exists")
	}
	if (!(data.type %in% c("bigBed","bigWig"))){
		stop("Unsupported data type")
	}
	hub.name <- "RnBeads Analysis Hub"
	#follow the steps from http://genome.ucsc.edu/goldenPath/help/hgTrackHubHelp.html
	#step1 is handled by rnb.export.to.ucsc and manual conversion to binary format
	#Step2
	dir.create(hub.dir)
	#Step3 has to be done manually
	#Step4
	fileConn<-file(file.path(hub.dir,"hub.txt"))
	writeLines(c(paste("hub",hub.name),
				 paste("shortLabel",ana.name),
				 paste("longLabel",ana.desc),
				 "genomesFile genomes.txt",
				 paste("email",email)
	),fileConn)
	close(fileConn)
	#Step5
	fileConn<-file(file.path(hub.dir,"genomes.txt"))
	writeLines(c(paste("genome",assembly(rnb.set)),
				 paste("trackDb ",assembly(rnb.set),"/trackDb.txt",sep="")
	),fileConn)
	close(fileConn)
	#Step6
	assembly.subdir <- file.path(hub.dir,assembly(rnb.set))
	if (!file.exists(assembly.subdir)){
		dir.create(assembly.subdir)
	}
	#Step7
	type.str <- data.type
	if (data.type == "bigBed"){
		type.str <- "bigBed 5 ."
	}
	lines2write <- c()
	for (i in 1:length(samples(rnb.set))){
		ss <- samples(rnb.set)[i]
		if (data.type=="bigBed"){
			lines2write <- c(lines2write,
				c(paste("track",paste("RnBeadsMethylationTrack",i,sep="")),
						paste("bigDataUrl",paste("rnbeads_sample",i,".",data.type,sep="")),
						paste("shortLabel",paste(ss)),
						paste("longLabel",paste("RnBeads methylation track for sample",ss)),
						paste("visibility","dense"),
						paste("color","0,60,120"),
						paste("spectrum","on"),
						paste("type ",type.str,"\n",sep=""))
			)
		} else if (data.type=="bigWig"){
			lines2write <- c(lines2write,
				     c(paste("track",paste("RnBeadsMethylationTrack",i,sep="")),
					 paste("bigDataUrl",paste("rnbeads_sample",i,".",data.type,sep="")),
					 paste("shortLabel",paste(ss)),
					 paste("longLabel",paste("RnBeads methylation track for sample",ss)),
					 paste("visibility","full"),
					 paste("viewLimits","0:1"), #bigWig
					 paste("viewLimitsMax","0:1"), #bigWig
					 paste("maxHeightPixels","100:30:10"), #bigWig
					 paste("color","0,60,120"),
					 paste("spectrum","on"),
	 				 paste("type ",type.str,"\n",sep=""))
			)
		}
	}
	fileConn<-file(file.path(hub.dir,assembly(rnb.set),"trackDb.txt"))
	writeLines(lines2write,fileConn)
	close(fileConn)
	#Step8
	#Maybe later
}

########################################################################################################################

#' rnb.convert.bedGraph.to.bigWig
#'
#' Converts \code{*.bedGraph} files to \code{*.bigWig} files.
#' 
#' @param files.inp  File names of the \code{*.bedGraph} files (input files).
#' @param files.out  File names of the \code{*.bigWig} files (output files).
#' @param file.chrom Existing text file storing chromosome lengths in base pairs.
#' @param file.exec  Full path to the tool bedGraphToBigWig.
#' @return Invisibly, number of successfully converted bedGraph files.
#'
#' @details Supported operating systems are currently Unix and MacOS only. The corresponding
#' 			binaries of \code{bedGraphToBigWig} are installed along with \pkg{RnBeads}. So are
#'          the chromosome sizes files.
#' @author Fabian Mueller
#' @noRd
rnb.convert.bedGraph.to.bigWig <- function(files.inp, files.out, file.chrom, file.exec) {
	result <- 0L
	for (i in 1:length(files.inp)) {
		cmd <- paste0('"', file.exec, '" "', files.inp[i], '" "', file.chrom, '" "', files.out[i], '"')
		cmd <- suppressWarnings(system(cmd, intern = TRUE, ignore.stderr = TRUE))
		result <- result + is.null(attr(cmd, "status"))
	}
	invisible(result)
}

########################################################################################################################

#' rnb.convert.bed.to.bigBed
#'
#' converts \code{*.bed} files to \code{*.bigBed} files.
#'
#' @param files.inp  File names of the \code{*.bed} files (input files).
#' @param files.out  File names of the \code{*.bigBed} files (output files).
#' @param file.chrom Existing text file storing chromosome lengths in base pairs.
#' @param file.exec  Full path to the tool bedToBigBed.
#' @return Invisibly, number of successfully converted bedGraph files.
#'
#' @author Fabian Mueller
#' @noRd
rnb.convert.bed.to.bigBed <- function(files.inp, files.out, file.chrom, file.exec){

	result <- 0L
	for (i in 1:length(files.inp)) {
		bedFn <- files.inp[i]

		## Remove the header (track line) if such exists
#		header.found <- isTRUE(grepl("^track", readLines(bedFn, n = 1)))
#		if (header.found) {
#			fileContent <- readLines(bedFn)[-1]
#			bedFn <- tempfile(pattern = "bedFile", fileext = ".bed")
#			writeLines(fileContent, bedFn)
#		}

		## Run bed to bigBed conversion
		cmd <- paste0('"', file.exec, '" "', bedFn, '" "', file.chrom, '" "', files.out[i], '"')
		cmd <- suppressWarnings(system(cmd, intern = TRUE, ignore.stderr = TRUE))
		result <- result + is.null(attr(cmd, "status"))

		## Clean up the temporary file if created
#		if (header.found) {
#			file.remove(bedFn)
#		}
	}
	invisible(result)
}

########################################################################################################################

#' rnb.export.to.trackhub
#'
#' convert an \code{\linkS4class{RnBSet}}  object to a UCSC-style track hub.
#' @param rnb.set Object of class \code{\linkS4class{RnBSet}}
#' @param out.dir output directory. If not existing, it will be created. otherwise files in that directory are overwritten.
#' @param reg.type region type to be converted
#' @param data.type either "bigBed" or "bigWig"
#' @param ... parameters passed on to the track hub generating procedure
#' @return a list containing information on the export
#' @details During execution the RnBSet is converted to bed files. If the operating system is supported (currently Unix and MacOS only)
#'          these are automatically converted to bigBed files. If your operating system is not supported, you need to create them manually
#'          (see the \href{http://genome.ucsc.edu/FAQ/FAQformat.html}{UCSC Genome Browser documentation} for details).
#'          For details on UCSC track hubs see the
#'          \href{http://genome.ucsc.edu/goldenPath/help/hgTrackHubHelp.html}{UCSC tracks help page}.
#' @author Fabian Mueller
#' @export
#' @examples
#' \donttest{
#' library(RnBeads.hg19)
#' data(small.example.object)
#' logger.start(fname=NA)
#' rnb.export.to.trackhub(rnb.set.example,tempdir())
#' }
rnb.export.to.trackhub <- function(rnb.set,out.dir,reg.type="sites",data.type="bigBed",...){
	## Validate paramter values
	if (!inherits(rnb.set, "RnBSet")) {
		stop("Invalid value for rnb.set: Expected RnBSet")
	}
	if (!(is.character(out.dir) && length(out.dir) == 1 && isTRUE(out.dir != ""))) {
		stop("Invalid value for out.dir")
	}
	if (!file.exists(out.dir)) {
		if (!dir.create(out.dir, FALSE, TRUE)) {
			rnb.error(c("Could not create directory", out.dir))
		}
	}
	if (!(is.character(reg.type) && length(reg.type) == 1 && isTRUE(reg.type != ""))) {
		stop("Invalid value for reg.type")
	}
	if (!(reg.type %in% c("sites", rnb.region.types(assembly(rnb.set))))) {
		stop(paste("Unsupported region type:", reg.type))
	}
	if (!(is.character(data.type) && length(data.type) == 1 && isTRUE(data.type != ""))) {
		stop("Invalid value for data.type")
	}
	if (!(data.type %in% c("bigBed","bigWig"))) {
		stop("Unsupported data type")
	}

	res <- list(assembly = assembly(rnb.set))
	if (data.type=="bigBed") {

		## Skip if executable is not found
		file.exec <- rnb.get.executable("bedToBigBed")
		if (length(file.exec) == 0) {
			txt <- paste("Skipped conversion bed -> bigBed for region type", reg.type)
			txt <- paste0(txt, "; could not find the tool bedToBigBed")
			rnb.warning(txt)
			res[["converted.bigBed"]] <- FALSE
			return(res)
		}
		logger.info(paste("Using", file.exec))

		## Expport all samples to bed files
		track.hub.dir <- file.path(out.dir, "trackHub_bigBed")
		bed.dir <- tempdir()
		rnb.logger.start("Conversion to BED")
		bed.conv <- rnb.RnBSet.to.bed(rnb.set, bed.dir, reg.type, inherits(rnb.set, "RnBiseqSet"), FALSE, TRUE, FALSE)
		rnb.logger.completed()

		## Prepare for conversion to bigWig
		file.chrom <- file.path(out.dir, "chromosomes.txt")
		in.file.list <- file.path(bed.dir, bed.conv$filenames$filename)
		out.file.list <- paste0("rnbeads_sample", bed.conv$filenames$i, ".bigBed")
		out.file.list <- file.path(track.hub.dir, assembly(rnb.set), out.file.list)
		res[["filenames.bed"]] <- bed.conv$filenames

		#@ Write the UCSC track hub structure
		rnb.logger.start("Creating Track Hub")
		create.ucsc.track.hub(track.hub.dir, rnb.set, data.type="bigBed", ...)

		## Convert bed to bigBed
		if (!file.exists(file.chrom)) {
			rnb.chromosome.lengths(rnb.set@assembly, file.chrom)
		}
		rnb.convert.bed.to.bigBed(in.file.list, out.file.list, file.chrom, file.exec)
		res[["converted.bigBed"]] <- TRUE
		rnb.logger.completed()

	} else if (data.type=="bigWig") {

		## Skip if executable is not found
		file.exec <- rnb.get.executable("bedGraphToBigWig")
		if (length(file.exec) == 0) {
			txt <- paste("Skipped conversion bedGraph -> bigWig for region type", reg.type)
			txt <- paste0(txt, "; could not find the tool bedGraphToBigWig")
			rnb.warning(txt)
			res[["converted.bigWig"]] <- FALSE
			return(res)
		}
		logger.info(paste("Using", file.exec))

		## Expport all samples to bedGraph files
		file.chrom <- file.path(out.dir, "chromosomes.txt")
		track.hub.dir <- file.path(out.dir, "trackHub_bigWig")
		bedgraph.dir <- file.path(out.dir, "bedgraph")
		rnb.logger.start("Conversion to bedGraph")
		bedGraph.conv <- rnb.RnBSet.to.bedGraph(rnb.set, bedgraph.dir, reg.type=reg.type)
		rnb.logger.completed()

		## Prepare for conversion to bigWig
		in.file.list <- file.path(bedgraph.dir, bedGraph.conv$filenames$File)
		out.file.list <- gsub("bedGraph$", "bigWig", bedGraph.conv$filenames$File)
		out.file.list <- file.path(track.hub.dir, assembly(rnb.set), out.file.list)

		## Skip if overlapping regions are found
		res[["contains.overlapping.regions"]] <- bedGraph.conv[["contains.overlapping.regions"]]
		res[["filenames.bedGraph"]] <- bedGraph.conv$filenames
		if (res[["contains.overlapping.regions"]]) {
			txt <- paste("Skipped conversion bedGraph -> bigWig for region type", reg.type)
			txt <- paste0(txt, "; overlapping fragments found")
			rnb.warning(txt)
			res[["converted.bigWig"]] <- FALSE
			unlink(bedgraph.dir, recursive = TRUE, force = TRUE)
			return(res)
		}

		## Write the UCSC track hub structure
		rnb.logger.start("Creating Track Hub")
		create.ucsc.track.hub(track.hub.dir, rnb.set, data.type="bigWig", ...)

		## Convert bedGraph to bigWig
		if (!file.exists(file.chrom)) {
			rnb.chromosome.lengths(rnb.set@assembly, file.chrom)
		}
		rnb.convert.bedGraph.to.bigWig(in.file.list,out.file.list,file.chrom,file.exec)
		res[["converted.bigWig"]] <- TRUE

		## Delete bedGraph files after successful conversion
		unlink(bedgraph.dir, recursive = TRUE, force = TRUE)
		rnb.logger.completed()
	}

	invisible(res)
}

########################################################################################################################
########################################################################################################################

#' rnb.export.to.ewasher
#'
#' Data exported to a format compatible with the FaST-LMM-EWASher tool for cell-mixture adjustment.
#' see \href{http://www.nature.com/nmeth/journal/v11/n3/full/nmeth.2815.html}{Zou, J., et al., Nature Methods, 2014}
#' for further details on the tool.
#' @param rnb.set Object of class \code{\linkS4class{RnBSet}}
#' @param out.dir output directory. If not existing, it will be created and all exported files will be placed here.
#' 				  If existing, this functions results in an error.
#' @param reg.type region type to be exported
#' @param ... passed on to \code{get.comparison.info}
#' @return a list containing information on the export
#' @author Fabian Mueller
#' @export
#' @examples
#' \donttest{
#' library(RnBeads.hg19)
#' data(small.example.object)
#' logger.start(fname=NA)
#' rnb.export.to.ewasher(rnb.set.example,tempfile(pattern="forEwasher"))
#' }
rnb.export.to.ewasher <- function(rnb.set, out.dir, reg.type="sites", ...){
	#example: rnb.export.to.ewasher(rnb.set,out.dir="~/tmp/rnb4ewasher",pheno.cols=rnb.getOption("differential.comparison.columns"))
	res <- list()
	if (!inherits(rnb.set, "RnBSet")) {
		stop("invalid value for rnb.set; expected RnBSet")
	}
	if (file.exists(out.dir)){
		stop("Could not create output directory as the specified directory already exists")
	}
	dir.create(out.dir)

	data.dir <- file.path(out.dir,"data_file")
	dir.create(data.dir)
	pheno.dir <- file.path(out.dir,"phenotype_file")
	dir.create(pheno.dir)
	covar.dir <- file.path(out.dir,"covar_file")
	dir.create(covar.dir)
	map.dir <- file.path(out.dir,"map_file")
	dir.create(map.dir)
	res[["data.subdir"]] <- "data_file"
	res[["pheno.subdir"]] <- "phenotype_file"
	res[["covar.subdir"]] <- "covar_file"
	res[["map.subdir"]] <- "map_file"


	cmp.info <- get.comparison.info(rnb.set,adjust.celltype=FALSE,...)

	mm <- meth(rnb.set,type=reg.type)
	aa <- annotation(rnb.set,type=reg.type)
	site.ids <- rownames(aa)
	sample.ids <- samples(rnb.set)
	# check if the sample ids start with a letter. If not prepend "S_"
	# because EWASher has problems with sample names starting with non-letter characters
	illegal.sample.id <- !grepl("^[[:alpha:]]", sample.ids)
	sample.ids[illegal.sample.id] <- paste0("S_",sample.ids[illegal.sample.id])

	cmp.ids   <- paste0("cmp",1:length(cmp.info))
	cmp.desc <- sapply(cmp.info,FUN=function(x){x$comparison})
	tab.summary <- data.frame(
		comparison_id=cmp.ids,
		comparison_desc=cmp.desc,
		stringsAsFactors=FALSE)
	res[["comparison.table"]] <- tab.summary

	#write the methylation data
	logger.start("Exporting methylation data")
	tab.data <- data.frame(
		ID=site.ids,
		mm
	)
	colnames(tab.data) <- c("ID",sample.ids)
	fn <- file.path(data.dir,"methylation.tsv")
	write.table(tab.data, file=fn,quote=FALSE,sep="\t",row.names=FALSE,col.names=TRUE)
	res[["data.files"]] <- rep("methylation.tsv",length(cmp.info))
	logger.completed()

	#write the location mapping data
	logger.start("Exporting genome mapping data")
	map.data <- data.frame(
		chrom=gsub("chr", "", aa$Chromosome),
		ID=site.ids,
		chromStart=aa$Start,
		chromEnd=aa$End-1
	)
	fn <- file.path(map.dir,"mapping.map")
	write.table(map.data, file=fn,quote=FALSE,sep="\t",row.names=FALSE,col.names=FALSE)
	res[["map.files"]] <- rep("mapping.map",length(cmp.info))
	logger.completed()

	res[["pheno.files"]] <- rep(NA,length(cmp.info))
	res[["covar.files"]] <- rep(NA,length(cmp.info))
	for (i in 1:length(cmp.info)){
		logger.start(c("Exporting data for comparison:",cmp.info[[i]]$comparison))
		#write the phenotype data
		logger.status("Exporting phenotype table...")
		grps <- rep(NA,length(sample.ids))
		grps[cmp.info[[i]]$group.inds$group1] <- 1
		grps[cmp.info[[i]]$group.inds$group2] <- 0
		tab.pheno <- data.frame(
			sample=sample.ids,
			pheno=grps
		)
		fnn <- paste0("pheno_",cmp.ids[i],".tsv")
		fn <- file.path(pheno.dir,fnn)
		write.table(tab.pheno, file=fn,quote=FALSE,sep="\t",row.names=FALSE,col.names=FALSE)
		res[["pheno.files"]][i] <- fnn

		#covariates
		pairing.info <- NULL
		covariates <- cmp.info[[i]]$adjustment.table
		if (cmp.info[[i]]$paired){
			pairing.info <- rep(NA,length(sample.ids))
			#the sample indices are assumed to be in pairing order and the number of samples in each group are equal
			num.pairs <- length(cmp.info[[i]]$group.inds$group1)
			pairing.info[cmp.info[[i]]$group.inds$group1] <- paste0("pair",1:num.pairs)
			pairing.info[cmp.info[[i]]$group.inds$group2] <- paste0("pair",1:num.pairs)
			pairing.info <- as.factor(pairing.info)
			if (is.null(covariates)){
				covariates <- data.frame(matrix(NA,nrow=length(sample.ids),ncol=0))
			}
			covariates <- data.frame(covariates,pairs=pairing.info)
		}
		if (!is.null(covariates)){
			logger.status("Exporting covariate table...")
			opts.na.orig <- options("na.action")[[1]] #for converting the dataframe and keeping NA's temporarily change the NA action
			options(na.action='na.pass')
			covariates <- model.matrix(~0+.,data=covariates)
			options(na.action=opts.na.orig)
			tab.covar <- data.frame(
				intercept=1,
				sample=sample.ids,
				covariates
			)
			fnn <- paste0("covar_",cmp.ids[i],".tsv")
			fn <- file.path(covar.dir,fnn)
			write.table(tab.covar, file=fn,quote=FALSE,sep="\t",row.names=FALSE,col.names=FALSE)
			res[["covar.files"]][i] <- fnn
		}
		logger.completed()
	}

	fn <- file.path(out.dir,"summary.tsv")
	write.table(tab.summary, file=fn,quote=FALSE,sep="\t",row.names=FALSE,col.names=TRUE)
	return(res)
}

########################################################################################################################

### rnb.section.export.ct.adj
###
### add information to the report for export
### @author Fabian Mueller
### @aliases rnb.section.export
### @param ewasher.obj Ewasher export results object. See \code{\link{rnb.export.to.ewasher}} for details.
### @param rnbSet RnBSet object
### @param report report object to which the content is added
### @return the updated report object
rnb.section.export.ct.adj <- function(ewasher.obj,rnbSet,report){
	res.ewasher <- ewasher.obj
	ewasher.dir <- file.path(rnb.get.directory(report, "data"),"sites","ewasher")
	sectionText <- c("Site-level methylation and differential methylation data has been exported to formats compatible with tools for ",
		"cell-mixture adjustment.")
	report <- rnb.add.section(report, 'Export to Other Cell Type Adjustment Tools', sectionText)
	sectionText <- c("In order to analyze the exported data with FaST-LMM-EWASher, first obtain either the ",
		"<a href=\"http://research.microsoft.com/en-us/downloads/472fe637-7cb9-47d4-a0df-37118760ccd1/\">R-version</a> ",
		"or the <a href=http://research.microsoft.com/en-us/downloads/8af2ab12-a205-4bbb-809c-a333ecaffa40/>Python-version</a> ",
		"of the tool and install it on your machine. ",
		"Note that the FaST-LMM-EWASher software is currently in development. Hence, installation (particularly on non-Windows-computers) ",
		"is a non-trivial task. We recommend to contact the FaST-LMM-EWASher developers in case of technical difficulties. ",
		"RnBeads exported files that can serve as an input to FaST-LMM-EWASher. ",
		"These input files can be found in the ",paste("<a href=\"",ewasher.dir,"\">data directory</a> ",sep=""),
		"accompanying this report. The following table contains the filenames for the input data for each differential methylation comparison:")
	report <- rnb.add.section(report, 'FaST-LMM-EWASher', sectionText, level=2)
	data.file.dir <- file.path(rnb.get.directory(report, "data"),"sites","ewasher",res.ewasher$data.subdir)
	data.file.links <- paste("<a href=\"",file.path(data.file.dir,res.ewasher$data.files),"\">",res.ewasher$data.files,"</a>",sep="")
	map.file.dir <- file.path(rnb.get.directory(report, "data"),"sites","ewasher",res.ewasher$map.subdir)
	map.file.links <- paste("<a href=\"",file.path(map.file.dir,res.ewasher$map.files),"\">",res.ewasher$map.files,"</a>",sep="")
	pheno.file.dir <- file.path(rnb.get.directory(report, "data"),"sites","ewasher",res.ewasher$pheno.subdir)
	pheno.file.links <- paste("<a href=\"",file.path(pheno.file.dir,res.ewasher$pheno.files),"\">",res.ewasher$pheno.files,"</a>",sep="")
	covar.file.dir <- file.path(rnb.get.directory(report, "data"),"sites","ewasher",res.ewasher$covar.subdir)
	covar.file.links <- ifelse(!is.na(res.ewasher$covar.files),
		paste("<a href=\"",file.path(covar.file.dir,res.ewasher$covar.files),"\">",res.ewasher$covar.files,"</a>",sep=""),
		"N/A")
	tt.ewasher <- data.frame(
		comparison=res.ewasher$comparison.table$comparison_desc,
		comparison_id=res.ewasher$comparison.table$comparison_id,
		data_file=data.file.links,
		pheno_file=pheno.file.links,
		map_file=map.file.links,
		covar_file=covar.file.links,
		stringsAsFactors=FALSE
	)
	rnb.add.table(report,tt.ewasher)

	sectionText <- c("To run the Python-version of FaST-LMM-EWASher with the output file use the following syntax for your shell or console ",
		"commands:")
	rnb.add.paragraph(report, sectionText)
	cmdSyntax <- paste0("<code>","python fastlmm-ewasher.py data_file pheno_file [-covar covar_file] [-map map_file]","</code>")
	rnb.add.paragraph(report,cmdSyntax)
	sectionText <- c("To run the R-version of FaST-LMM-EWASher with the output file use the following syntax for your shell or console ",
		"commands:")
	rnb.add.paragraph(report, sectionText)
	cmdSyntax <- paste0("<code>","Rscript FLE-script.r data_file pheno_file [-covar covar_file] [-map map_file]","</code>")
	rnb.add.paragraph(report,cmdSyntax)
	return(report)
}

### rnb.section.tnt
###
### add information to the report for export
### @author Fabian Mueller
### @aliases rnb.section.export
### @param res.exp Exporting results object. See \code{\link{rnb.execute.tnt}} for details.
### @param rnbSet RnBSet object
### @param report report object to which the content is added
### @return the updated report object
rnb.section.tnt <- function(res.exp,rnbSet,report){
	if (is.null(res.exp)){
		sectionText <- "Nothing was exported"
		report <- rnb.add.section(report, 'Nothing Exported', sectionText)
	} else {
		is.biseq <- "RnBiseqSet" %in% class(rnbSet)
		is.beads <- "RnBeadSet" %in% class(rnbSet)
		export.ucsc.types <- unique(unlist(lapply(res.exp,FUN=function(x){x$export.trackhub})))
		any.export.bed <- any(unlist(lapply(res.exp,FUN=function(x){x$export.bed})))
		export.ewasher <- is.element("sites",names(res.exp))
		if (export.ewasher) export.ewasher <- is.element("export.ewasher",names(res.exp$sites))
		sectionText <- "Methylation Data was exported to the following formats:<ul>\n"
		if (any.export.bed){
			sectionText <- paste(sectionText,"<li><a href=\"http://genome.ucsc.edu/FAQ/FAQformat.html#format1\">bed</a></li>")
		}
		if ("bigBed" %in% export.ucsc.types){
			sectionText <- paste(sectionText,"<li><a href=\"http://genome.ucsc.edu/goldenPath/help/hgTrackHubHelp.html\">Track hub</a>: bigBed</li>")
		}
		if ("bigWig" %in% export.ucsc.types){
			sectionText <- paste(sectionText,"<li><a href=\"http://genome.ucsc.edu/goldenPath/help/hgTrackHubHelp.html\">Track hub</a>: bigWig</li>")
		}
		if (export.ewasher){
			refText <- c("Zou, J., Lippert, C., Heckerman, D., Aryee, M., Listgarten, J. (2014). ",
				"Epigenome-wide association studies without the need for cell-type composition. ",
				"<i>Nature Methods</i>, 11(3), 309-311")
			report <- rnb.add.reference(report, refText)
			sectionText <- paste(sectionText, "<li>Input for the FaST-LMM-EWASher tool for cell-mixture adjustment ",
				rnb.get.reference(report, refText), "</li>")
		}
		sectionText <- paste(sectionText,"</ul>")
		report <- rnb.add.section(report, 'Methylation Data', sectionText)
		summary.tab <- rnb.sample.summary.table(rnbSet)
		fname <- paste("sample_methylation_summary.csv",sep="")
		fname <- rnb.write.table(summary.tab,fname,fpath=rnb.get.directory(report, "data", absolute = TRUE),format="csv",gz=FALSE,row.names = FALSE,quote=FALSE)

		sectionText <- paste("This section contains summary metrics for each sample as well as links to exported files. The summary table below contains the following metrics. It is also available as ",
			"<a href=\"", rnb.get.directory(report, "data"), "/", fname,"\">csv file</a>.",sep="")
		column.description <- c("<ul>",
									"<li>sampleName: Name of the sample</li>",
									"<li>*_num (* can be 'sites' or a region type): Number of sites or regions with coverage in the sample</li>")
		if (is.biseq){
			column.description <- c(column.description,
									"<li>*_covgMean: Mean coverage of sites or regions in the sample</li>",
									"<li>*_covgMedian: Median coverage of sites or regions in the sample</li>",
									"<li>*_covgPerc25,75: 25 and 75 percentile of coverage of sites or regions in the sample</li>",
									"<li>*_numCovg5,10,30,60: Number of sites or regions with coverage greater or equal to 5,10,30,60</li>")
		}
		if (is.beads){
			if (!is.null(dpval(rnbSet))){
				column.description <- c(column.description,
										"<li>sites_numDPval5em2,1em2,1em3: Number of sites with a detection p-value smaller than 0.05,0.01,0.001</li>")
			}
		}
		column.description <- c(column.description,
									"<li>**_numSitesMean (** is any region type): Mean number of sites in a region</li>",
									"<li>**_numSitesMedian (** is any region type): Median number of sites in a region</li>",
									"<li>**_numSites2,5,10,20: Number of regions with at least 2,5,10,20 sites with valid methylation measurements</li>",
							   "</ul>")

		sectionText <- paste(sectionText,paste(column.description,collapse="\n"))
		if (any.export.bed){
			sectionText <- c(sectionText," Bed files for each sample contain the locations of methylation sites and regions the methylation level
						   (in the score column). The following table links to the files. The files can be quite large in size, so you might want to
							consider using the \"save as\" option instead of opening the links directly.")
			tt <- data.frame(sample=as.character(res.exp[[1]]$filenames.bed[,"sample"]),stringsAsFactors=FALSE)
			for (i in 1:length(res.exp)){
				rr <- names(res.exp)[i]
				ttt <- res.exp[[i]]$filenames.bed[,c("sample","filename")]
				ttt$filename <- paste("<a href=\"",file.path(rnb.get.directory(report, "data"),rr,"bed",ttt$filename),"\">","bed","</a>",sep="")
				tt <- data.frame(tt,ttt$filename,stringsAsFactors=FALSE)
			}
			colnames(tt)[2:ncol(tt)] <- paste0(names(res.exp),"_bedFile")
			rownames(tt) <- tt$sample
			bed.df <- data.frame(tt[summary.tab$sampleName,2:ncol(tt)],stringsAsFactors=FALSE)
			colnames(bed.df) <- colnames(tt)[2:ncol(tt)]
			summary.tab <- data.frame(sampleName=summary.tab$sampleName,bed.df,summary.tab[,2:ncol(summary.tab)],stringsAsFactors=FALSE)
		}
		if ("bigBed" %in% export.ucsc.types){
			tt <- data.frame(sample=as.character(res.exp[[1]]$filenames.bed[,"sample"]),stringsAsFactors=FALSE)
			for (i in 1:length(res.exp)){
				rr <- names(res.exp)[i]
				ttt <- res.exp[[i]]$filenames.bed[,c("sample","filename")]
				ttt$filename <- gsub(".bed$",".bigBed",ttt$filename) #replace bed with bigBed
				ttt$filename <- paste("<a href=\"",file.path(rnb.get.directory(report, "data"),rr,"trackHub_bigBed",assembly(rnbSet),ttt$filename),"\">","bigBed","</a>",sep="")
				tt <- data.frame(tt,ttt$filename,stringsAsFactors=FALSE)
			}
			colnames(tt)[2:ncol(tt)] <- paste0(names(res.exp),"_bigBedFile")
			rownames(tt) <- tt$sample
			bigbed.df <- data.frame(tt[summary.tab$sampleName,2:ncol(tt)],stringsAsFactors=FALSE)
			colnames(bigbed.df) <- colnames(tt)[2:ncol(tt)]
			summary.tab <- data.frame(sampleName=summary.tab$sampleName,bigbed.df,summary.tab[,2:ncol(summary.tab)],stringsAsFactors=FALSE)
		}

		report <- rnb.add.section(report, "Sample summary", sectionText, level=2)
		rnb.add.table(report,summary.tab,row.names=FALSE)

		if (length(export.ucsc.types)>0){

			sectionText <- paste("Track Hub data was generated for export to various genome browsers. Note that you need a server that is capable of
							serving the tracks to the genome browser via URL. Below, instructions are provided to view the tracks in the UCSC genome browser. Of course the files can also be viewed in other browsers such as the
							<a href=\"http://www.ensembl.org/\">Ensembl genome browser</a>.
							<ol>\n")
			anything.unconverted <- FALSE
			if ("bigBed" %in% export.ucsc.types) {
				if (!res.exp[[1]]$converted.bigBed) {
					anything.unconverted <- TRUE
				}
			}
			if ("bigWig" %in% export.ucsc.types) {
				if (!res.exp[[1]]$converted.bigWig) {
					anything.unconverted <- TRUE
				}
			}
			if (anything.unconverted) {
				sectionText <- paste(sectionText,"<li>Convert the bed/bedGraph files contained in the bed/bedGraph directories (see table below) attached to this report.",
									 "You can use the UCSC tool <code>bedTobigBed</code>/<code>bedGraphTobigWig</code> for this task. More information (e.g. where to obtain the tool)
									 on how to convert can be found <a href=\"http://genome.ucsc.edu/goldenPath/help/bigBed.html\">here</a>.",
									 "Make sure the resulting bigBed/bigWig files are moved to the corresponding UCSC track hub directory (see table(s) below).</li>")
			}
			sectionText <- paste(sectionText,"<li>Make sure you are viewing this report via the internet and not locally (simply look at the URL in the address line
							of your browser). If this is not the case, copy the files belonging to this report
							to a directory which is accessible via the internet and enter the corresponding URL in your browser.
							If you don't want to copy the entire report, it suffices to copy the track hub directories (see table(s) below).</li>")
			sectionText <- paste(sectionText,"<li>Make sure the",res.exp[[1]][["assembly"]],"subdirectories",
								"in the track hub directory (for its location be referred to the table(s) below) contain all the required bigBed/bigWig files</li>")
			sectionText <- paste(sectionText,"<li>Go to the <a href=\"http://genome.ucsc.edu/cgi-bin/hgHubConnect\">UCSC hub connection website</a>
							to the \"My Hubs\" tab and add the web-URL of the <code>hub.txt</code> file in the directory you just copied.
							Hint: You can use the copy link functionality of your browser (right mouse click) on the track hub txt column in the table(s) below.
							Don't forget to click on \"Load Selected Hubs\".</li>")
			sectionText <- paste(sectionText,paste("<li>Make sure you are looking at the correct genome (",res.exp[[1]][["assembly"]],").
							The loaded hubs should now appear below the browsing window where you can modify their display settings.</li>",sep=""))
			sectionText <- paste(sectionText,"</ol>\nMore information on UCSC track hubs can be found
						   <a href=\"http://genome.ucsc.edu/goldenPath/help/hgTrackHubHelp.html\">here</a>. Below, you find table(s) containing the directory names
						   for the bed/bedGraph and track hub directories for all exported data, site and region types.\n")
			report <- rnb.add.section(report, 'Tracks Hubs', sectionText, level=2)

			rrs <- names(res.exp)
			if ("bigBed" %in% export.ucsc.types){
				track.hub.dirs <- file.path(rnb.get.directory(report, "data"),rrs,"trackHub_bigBed")
				track.hub.urls <- file.path(rnb.get.directory(report, "data"),rrs,"trackHub_bigBed","hub.txt")
				bed.dirs <- file.path(rnb.get.directory(report, "data"),rrs,"bed")
				track.hub.dirs.links <- paste("<a href=\"",track.hub.dirs,"\">",track.hub.dirs,"</a>",sep="")
				track.hub.urls.links <- paste("<a href=\"",track.hub.urls,"\">",track.hub.urls,"</a>",sep="")
				bed.dirs.links <- paste("<a href=\"",bed.dirs,"\">",bed.dirs,"</a>",sep="")
				tt.bb <- data.frame("bed directory"=bed.dirs.links,"track hub directory"=track.hub.dirs.links,"track hub txt"=track.hub.urls.links)
				rownames(tt.bb) <- rrs
				report <- rnb.add.section(report, "bigBed", NULL, level=3)
				rnb.add.table(report,tt.bb)
			}
			if ("bigWig" %in% export.ucsc.types){
				regions.contain.overlap <- unlist(lapply(res.exp,FUN=function(x){
					base::ifelse(is.null(x[["contains.overlapping.regions"]]),FALSE,x[["contains.overlapping.regions"]])
				}))

				if (sum(!regions.contain.overlap)>0){
					track.hub.dirs <- file.path(rnb.get.directory(report, "data"),rrs[!regions.contain.overlap],"trackHub_bigWig")
					track.hub.urls <- file.path(rnb.get.directory(report, "data"),rrs[!regions.contain.overlap],"trackHub_bigWig","hub.txt")
					track.hub.dirs.links <- paste("<a href=\"",track.hub.dirs,"\">",track.hub.dirs,"</a>",sep="")
					track.hub.urls.links <- paste("<a href=\"",track.hub.urls,"\">",track.hub.urls,"</a>",sep="")
					tt.bw <- data.frame("track hub directory"=track.hub.dirs.links,"track hub txt"=track.hub.urls.links)
					if (!res.exp[[1]]$converted.bigWig){
						bed.dirs <- file.path(rnb.get.directory(report, "data"),rrs,"bedgraph")
						bed.dirs.links <- paste("<a href=\"",bed.dirs,"\">",bed.dirs,"</a>",sep="")
						tt.bw <- data.frame("bedgraph directory"=bed.dirs.links,tt.bw)
					}
					rownames(tt.bw) <- rrs[!regions.contain.overlap]
					report <- rnb.add.section(report, "bigWig", NULL, level=3)
					rnb.add.table(report,tt.bw)
				}
			}
		}
	}
	return(report)
}

########################################################################################################################

#' rnb.execute.tnt
#'
#' export RnBSet to various output data formats
#' @author Fabian Mueller
#' @aliases rnb.execute.tnt
#' @param rnb.set \code{\linkS4class{RnBSet}} object
#' @param out.dir output directory.
#' @param exp.bed A character vector indicating which data types should be exported to UCSC. Possible values in the vector are \code{bigBed} and \code{bigWig}.
#' 				  If \code{NULL}, UCSC export is disabled
#' @param exp.trackhub file types which should be exported to a trackhub structure.
#' @param region.types a character vector indicating region types to be exported
#' @param ... Arguments passed to \code{\link{rnb.export.to.trackhub}}
#' @return a list containing information on the export
#' @export
#' @examples
#' \donttest{
#' library(RnBeads.hg19)
#' data(small.example.object)
#' logger.start(fname=NA)
#' rnb.execute.tnt(rnb.set.example,tempdir())
#' }
rnb.execute.tnt <- function(rnb.set,out.dir,exp.bed=rnb.getOption("export.to.bed"),
		exp.trackhub=rnb.getOption("export.to.trackhub"),region.types=rnb.getOption("export.types"),...){
	logger.start("Generating Tracks and Tables")
	reg.types <- intersect(region.types,c(rnb.region.types(assembly(rnb.set)),"sites"))
	if (length(reg.types)<1){
		stop("Region types not found")
	}
	if (length(reg.types) < length(region.types)){
		logger.warning("Not all region types were found in the annotation")
	}
	ress <- list()
	for (rr in reg.types){
		logger.start(c("Exporting",rr))
		out.dir.cur <- file.path(out.dir,rr)
		dir.create(out.dir.cur)
		res <- list(export.bed = exp.bed, export.trackhub = character())
		if (exp.bed) {
			logger.start("Creating BED Files")
			bed.dir <- file.path(out.dir.cur,"bed")
			res.exp <- rnb.RnBSet.to.bed(rnb.set,bed.dir,reg.type=rr,names.quant.meth=TRUE)
			logger.completed()
			res[["filenames.bed"]] <- res.exp[["filenames"]]
		}
		if ("bigBed" %in% exp.trackhub) {
			logger.start("Creating Track Hub -- bigBed")
			res.exp <- rnb.export.to.trackhub(rnb.set,out.dir.cur,reg.type=rr,
							ana.name=paste("RnBeads Analysis -- ",rr,",bigBed",sep=""),
							ana.desc=paste(rr, "methylation (bigBed)"),data.type="bigBed",...)
			logger.completed()
			res <- c(res,res.exp)
			res[["export.trackhub"]] <- c(res[["export.trackhub"]],"bigBed")
		}
		if ("bigWig" %in% exp.trackhub){
			logger.start("Creating UCSC Track Hub -- bigWig")
			res.exp <- rnb.export.to.trackhub(rnb.set,out.dir.cur,reg.type=rr,
					ana.name=paste0("RnBeads Analysis -- ",rr,",bigWig"),
					ana.desc=paste(rr, "methylation (bigWig)"),data.type="bigWig",...)
			logger.completed()
			res <- c(res,res.exp)
			if (res[["converted.bigWig"]]) {
				res[["export.trackhub"]] <- c(res[["export.trackhub"]],"bigWig")
			}
		}
		ress <- c(ress,list(res))
		logger.completed()
	}
	names(ress) <- reg.types
	logger.completed()
	invisible(ress)
}

########################################################################################################################
########################################################################################################################

## rnb.execute.export.csv.save
##
## Attempts to save the methylation values matrix of the given region type to the specified file.
##
## @param rnb.set         Methylation dataset as an object of type inheriting \code{\linkS4class{RnBSet}}.
## @param region.type     Site or region type to be exported.
## @param output.location Target file path.
## @param fname           Target file name. If this file already exists, it will be overwritten.
## @param gz              Flag indicating whether the file should be zipped in \code{gz} format.
## @return The (possibly updated) target file name, invisibly. If \code{gz} is \code{TRUE}, the string \code{".gz"} will
##         be appended to \code{fname}.
##
## @seealso \code{\link{rnb.write.table}}
## @author Yassen Assenov
rnb.execute.export.csv.save <- function(rnb.set, region.type, output.location, fname,
	gz = rnb.getOption("gz.large.files")) {
	mm <- meth(rnb.set, region.type, row.names = TRUE)
	reg.annotation <- annotation(rnb.set, region.type)
	accepted.columns <- tolower(colnames(reg.annotation)) %in% c("chrom", "chromosome", "start", "end", "strand")
	mm <- cbind(data.frame(ID = rownames(mm), check.names = FALSE, stringsAsFactors = FALSE),
		reg.annotation[, accepted.columns], mm)
	rnb.write.table(mm, fname, output.location, gz = gz, row.names = FALSE, quote = FALSE)
}

########################################################################################################################

#' rnb.execute.export.csv
#'
#' Exports (selected) methylation tables of the given dataset to comma-separated value files.
#'
#' @param rnb.set         Methylation dataset as an object of type inheriting \code{\linkS4class{RnBSet}}.
#' @param output.location \code{character} or \code{\linkS4class{Report}} specifying the output directory. If this is a
#'                        report, the output directory is set to be a subdirectory named \code{csv} of the report's data
#'                        directory. Set this parameter to the empty string (\code{""}) or \code{NA} to use the current
#'                        working directory. If the given path does not exist, this function attempts to create it.
#' @param region.types    \code{character} vector indicating region types to be exported.
#' @return \code{character} vector containing the names of the files to which data were exported; prepended by
#'         \code{output.location}. In case a certain region type could not be exported (see the \emph{Details} section),
#'         the corresponding element of this vector is \code{NA}.
#'
#' @details
#' The names of the generated output files are formed by the prefix \code{"betas_"}, followed by a number between
#' 1 and \code{length(region.types)}. The extension is \code{.csv} or \code{.csv.gz}, depending on the value of the
#' \pkg{RnBeads} option \code{"gz.large.files"}. Any such files that already exist in the output directory, are
#' overwritten.
#'
#' There are several reasons why a certain output file cannot be (fully) generated. Examples for failures are listed
#' below:
#' \itemize{
#'   \item{}{The corresponding region type is invalid.}
#'   \item{}{The corresponding region type is not supported by the dataset. If the type is loaded in \pkg{RnBeads},
#'           use the \code{\link[=summarize.regions,RnBSet-method]{summarize.regions}} method prior to calling this function,
#'           in order to include the support of this region type in the dataset.}
#'   \item{}{Due to security restrictions, the creation of files in the output directory is not allowed.}
#'   \item{}{A file or directory with the same name exists and cannot be overwritten.}
#'   \item{}{The disk is full or the user quota is exceeded.}
#' }
#'
#' @examples
#' \donttest{
#' library(RnBeads.hg19)
#' data(small.example.object)
#' rnb.execute.export.csv(rnb.set.example, "", summarized.regions(rnb.set.example))
#' }
#' @author Yassen Assenov
#' @export
rnb.execute.export.csv <- function(rnb.set, output.location, region.types = rnb.getOption("export.types")) {
	if (!inherits(rnb.set, "RnBSet")) {
		stop("invalid value for rnb.set")
	}
	if (inherits(output.location, "Report")) {
		fprefix <- paste0(rnb.get.directory(output.location, "data", absolute = FALSE), "/csv/")
		output.location <- file.path(rnb.get.directory(output.location, "data", absolute = TRUE), "csv")
	} else {
		if (!(length(output.location) == 1 && (is.character(output.location) || is.na(output.location)))) {
			stop("invalid value for output.location; expected Report or character of length 1")
		}
		if (is.na(output.location) || output.location == ".") {
			output.location <- ""
		} else if (output.location != "") {
			output.location <- sub("^(.+)[/\\\\]$", "\\1", output.location) # remove trailing slash or backslash
		}
		fprefix <- ifelse(output.location == "", "", paste0(gsub("\\", "/", output.location, fixed = TRUE), "/"))
	}
	if (is.null(region.types)) {
		region.types <- character()
	} else if (!(is.character(region.types))) {
		stop("invalid value for region.types")
	}
	if (output.location != "") {
		if (!file.exists(output.location)) {
			## Attempt to create output.location
			if (!dir.create(output.location, showWarnings = FALSE, recursive = TRUE)) {
				rnb.error(c("Could not create directory", output.location))
			}
		} else if (!file.info(output.location)[1, "isdir"]) {
			rnb.error(c("Specified path is not a directory:", output.location))
		}
	}

	result <- region.types
	names(result) <- region.types
	if (length(region.types) != 0) {
		for (i in 1:length(region.types)) {
			fname <- paste0("betas_", i, ".csv")
			fname <- tryCatch(
				rnb.execute.export.csv.save(rnb.set, region.types[i], output.location, fname),
				error = function(e) { return(as.character(NA)) })
			if (!is.na(fname)) {
				fname <- paste0(fprefix, fname)
			}
			result[i] <- fname
		}
	}
	return(result)
}

########################################################################################################################

#' rnb.section.export.csv
#'
#' Creates a report section dedicated to exporting (selected) methylation tables to comma-separated value files.
#'
#' @param report         Report on data export to contain the CSV section. This must be an object of type
#'                       \code{\linkS4class{Report}}.
#' @param export.summary Result of the exporting procedure, as returned by \code{\link{rnb.execute.export.csv}}.
#' @return The (possibly) modified report.
#'
#' @author Yassen Assenov
#' @noRd
rnb.section.export.csv <- function(report, export.summary) {
	if (!inherits(report, "Report")) {
		stop("invalid value for report")
	}
	if (!(is.character(export.summary) && (!is.null(names(export.summary))))) {
		stop("invalid value for export.summary")
	}
	if (length(export.summary) == 0) {
		return(report)
	}

	txt <- "This section summarizes the result of exporting methylation data to comma-separated value (CSV) files."
	report <- rnb.add.section(report, "Export to Comma-separated Value Files", txt)

	if (all(is.na(export.summary))) {
		txt <- "Methylation matrices for none of the requested region types could be exported. "
		rnb.export.fail(report, txt)
		return(report)
	}
	txt <- c("The table below contains links to all generated CSV files that store exported methylation &beta; values.")
	if (any(is.na(export.summary))) {
		txt <- c(txt, " Note that the data for ", sum(is.na(export.summary)), " region types could not be exported.")
	}
	rnb.add.paragraph(report, txt)
	ftypes <- rep(".csv", length(export.summary))
	ftypes[grep("\\.csv\\.gz$", export.summary)] <- ".csv.gz"
	tsummary <- data.frame(
		"Region type" = names(export.summary),
		"File" = paste("<a href=\"", export.summary, "\">", ftypes, "</a>", sep = ""),
		check.names = FALSE, stringsAsFactors = FALSE)
	tsummary[is.na(export.summary), "File"] <- as.character(NA)
	rnb.add.table(report, tsummary, row.names = FALSE)
	if (any(is.na(export.summary))) {
		rnb.export.fail(report)
	}

	return(report)
}

Try the RnBeads package in your browser

Any scripts or data that you put into this service are public.

RnBeads documentation built on March 3, 2021, 2 a.m.