R/regionProfiles.R

Defines functions rnb.step.locus.profiles sortAndExtendRegions rnb.section.locus.profiles rnb.plot.locus.profile locus.profile.get.methylation.track.smooth locus.profile.get.methylation.track.heatmap locus.profile.get.methylation.track.param.processing locus.profile.get.base.tracks rnb.step.region.profiles rnb.section.region.profiles rnb.plot.region.profile.density rnb.plot.region.profiles rnb.plot.region.site.density rnb.find.relative.site.coord

Documented in rnb.find.relative.site.coord rnb.plot.locus.profile rnb.plot.region.profile.density rnb.plot.region.profiles rnb.plot.region.site.density

########################################################################################################################
## regionProfiles.R
## created: 2012-12-18
## creator: Fabian Mueller
## ---------------------------------------------------------------------------------------------------------------------
## Methods for computing region methylation profiles
########################################################################################################################

#' rnb.find.relative.site.coord
#'
#' given a region types, assigns sites to regions and determines relative positions
#' of sites in the assigned region
#'
#' @param rnb.set RnBSet object
#' @param region.type Region type for which the coordinates are computed
#' @param extend.by A number between 0 and 1 specifying the percentage by which a region is
#'        extended in order to capture methylation information before region start and after region end
#' @return a data frame containing the site index, the assigned region index and the relative coordinate
#'         The relative coordinate is 0 if the site's coordinate is identical to the region start coordinate
#'         and 1 if identical to the regions end coordinate and scaled inbetween. Coordinates can be
#'         less than 0 or larger than 1 if a site is in the upstream or downstream flanking region respectively
#'
#' @author Fabian Mueller
#' @export
rnb.find.relative.site.coord <- function(rnb.set,region.type,extend.by=0.33) {
	annot.s <- annotation(rnb.set,"sites")
	gr.s <- data.frame2GRanges(annot.s, chrom.column="Chromosome",start.column="Start",end.column="End",
							   strand.column="Strand",assembly=assembly(rnb.set),sort.result=FALSE)
	annot.r <- annotation(rnb.set,region.type)
	gr.r <- data.frame2GRanges(annot.r, chrom.column="Chromosome",start.column="Start",end.column="End",
							   strand.column="Strand",assembly=assembly(rnb.set),sort.result=FALSE)
	r.lengths <- end(gr.r)-start(gr.r)+1
	r.strands <- as.character(strand(gr.r))
	#extend the regions to contain the flanking regions: first extend at the end by twice extend.by and then shift to
	# also have the flanking region at the start
	gr.re <- shift(resize(gr.r,width=ceiling(r.lengths*(1+2*extend.by))),ifelse(r.strands=="-",1,-1) * ceiling(r.lengths*extend.by))

	strand.spec <- rnb.getOption("strand.specific")
	over.re <- findOverlaps(gr.s,gr.re,select="all",ignore.strand=!strand.spec)
	
	#get the relative coordinates
	si <- queryHits(over.re)
	ri <- subjectHits(over.re)
	rel.coords <- ifelse(
		r.strands[ri]=="-",
		end(gr.r)[ri]   - end(gr.s)[si],
		start(gr.s)[si] - start(gr.r)[ri]
	)/r.lengths[ri]
	res <- data.frame(site.idx=si,gene.idx=ri,relative.coord=rel.coords)
	return(res)
}

#' rnb.plot.region.site.density
#'
#' Plots the density of sites accross the specified region type
#'
#' @param rnb.set RnBSet object
#' @param region.type Region type for which the plot should be generated
#' @param extend.by A number between 0 and 1 specifying the percentage by which a region is
#'        extended in order to capture methylation information before region start and after region end
#' @return a ggplot2 object for plotting
#'         the plot shows the density of sites accross the specified region type for all regions of that
#'         type from 0 (region start) to 1 (region end). Sites in the flanking areas are
#'         also shown (coordinates <0 and >1).
#'
#' @author Fabian Mueller
#' @export
#' @examples
#' \donttest{
#' library(RnBeads.hg19)
#' data(small.example.object)
#' logger.start(fname=NA)
#' rnb.plot.region.site.density(rnb.set.example,"genes")
#' }
rnb.plot.region.site.density <- function(rnb.set,region.type,extend.by=0.33){
	dframe <- rnb.find.relative.site.coord(rnb.set,region.type,extend.by=extend.by)
	pp <- ggplot(dframe) + aes(x=relative.coord) + geom_density(color=rnb.getOption('colors.category')[1],
		  fill=rnb.getOption('colors.category')[1], alpha=0.5) +
		  xlab("relative position") + ylab("site density") + geom_vline(xintercept=c(0,1))
	return(pp)
}

#' rnb.plot.region.profiles
#'
#' Creates a composite plot showing the sample and groupwise smoothed estimates
#' of methylation values accross all regions of the specified type
#'
#' @param rnb.set RnBSet object
#' @param group.index.list a list (preferably named) containing sample indices for each group
#'		a list of such lists is for instance generated by the \code{rnb.sample.groups} function.
#' @param region.type Region type for which the plot should be generated
#' @param region.profile Alternative to specifying \code{region.type}, the function can accept a region profile
#'                       generated by the \code{rnb.find.relative.site.coord} function
#' @param extend.by A number between 0 and 1 specifying the percentage by which a region is
#'        extended in order to capture methylation information before region start and after region end
#' @param cvalues Color values that will be assigned to sample groups
#' @return a ggplot2 object for plotting
#'         the plot shows the smoothed methylation levels of sites accross the specified region type for 
#'         all regions of that type from 0 (region start) to 1 (region end). Sites in the flanking areas are
#'         also shown (coordinates <0 and >1).
#'         Smoothing is stratified by sample (dashed lines) and sample group (thick solid lines).
#'         Cubic splines are used for smoothing
#'
#' @author Fabian Mueller
#' @export
#' @examples
#' \donttest{
#' #Careful: this might take a while
#' library(RnBeads.hg19)
#' data(small.example.object)
#' logger.start(fname=NA)
#' rnb.plot.region.profiles(rnb.set.example,rnb.sample.groups(rnb.set.example)[[1]],"genes")
#' }
rnb.plot.region.profiles <- function(rnb.set, group.index.list, region.type="", region.profile=NULL, extend.by=0.33,
		cvalues=rnb.getOption("colors.category")){
	if (is.null(region.profile)){
		if (!is.element(region.type,summarized.regions(rnb.set))){
			stop("Invalid region type. You must specify either region.type or region.profile")
		}
		region.profile <- rnb.find.relative.site.coord(rnb.set,region.type,extend.by=extend.by)
	}
	grp.names <- names(group.index.list)
	if (is.null(grp.names)) grp.names <- 1:length(group.index.list)

	df2p <- do.call("rbind",lapply(1:length(group.index.list),FUN=function(gi){
		grp.name <- grp.names[gi]
		sample.idx <- group.index.list[[gi]]
		sample.names <- samples(rnb.set)[sample.idx]
		n.samples <- length(sample.idx)
		res <- data.frame()
		if (n.samples > 0){
			mm <- meth(rnb.set)[region.profile$site.idx, sample.idx, drop=FALSE]
			res <- data.frame(
				site.idx=rep(region.profile$site.idx,n.samples),
				relative.coord=rep(region.profile$relative.coord,n.samples),
				meth=as.vector(mm),
				sample=rep(sample.names,rep(nrow(mm),n.samples)),
				group=grp.name
			)
		}
		return(res)
	}))
	
	col.vals <- rep(cvalues,length.out=length(unique(df2p$group)))
	pp <- ggplot(df2p,aes(x=relative.coord,y=meth,group=sample,color=group,fill=group)) + 
		  ylim(0, 1) + ylab("methylation") + xlab("relative position") +
		  scale_color_manual(values = col.vals) + scale_fill_manual(values = col.vals) +
		  geom_smooth(alpha=0.15,linetype="dotted",method="gam",formula=y ~ s(x, bs = "cs")) +
		  geom_smooth(aes(group=group),size=2,alpha=0.15,method="gam",formula=y ~ s(x, bs = "cs")) +
		  # geom_smooth(alpha=0.15,linetype="dotted") +
		  # geom_smooth(aes(group=group),size=2,alpha=0.15) +
		  geom_vline(xintercept = c(0,1), linetype = "solid",size=1.5)
	return(pp)
}

#' rnb.plot.region.profiles
#'
#' Plots the density of methylation levels accross all regions of the specified type
#'
#' @param rnb.set RnBSet object
#' @param sample Index or name of the sample for which the plot should be generated
#' @param region.type Region type for which the plot should be generated
#' @param region.profile Alternative to specifying \code{region.type}, the function can accept a region profile
#'                       generated by the \code{rnb.find.relative.site.coord} function
#' @param extend.by A number between 0 and 1 specifying the percentage by which a region is
#'        extended in order to capture methylation information before region start and after region end
#' @return a ggplot2 object for plotting
#'         the plot shows the density of methylation levels of sites accross the specified region type for 
#'         all regions of that type from 0 (region start) to 1 (region end). Sites in the flanking areas are
#'         also shown (coordinates <0 and >1).
#'
#' @author Fabian Mueller
#' @export
#' @examples
#' \donttest{
#' library(RnBeads.hg19)
#' data(small.example.object)
#' logger.start(fname=NA)
#' rnb.plot.region.profile.density(rnb.set.example,1,"genes")
#' }
rnb.plot.region.profile.density <- function(rnb.set, sample, region.type="", region.profile=NULL, extend.by=0.33){
	if (is.null(region.profile)){
		if (!is.element(region.type,summarized.regions(rnb.set))){
			stop("Invalid region type. You must specify either region.type or region.profile")
		}
		region.profile <- rnb.find.relative.site.coord(rnb.set,region.type,extend.by=extend.by)
	}

	df2p <- data.frame(
		site.idx=region.profile$site.idx,
		relative.coord=region.profile$relative.coord,
		meth=meth(rnb.set)[region.profile$site.idx,sample]
	)
	df2p <- na.omit(df2p)
	#the standard bandwith function of MASS::kde2d is unstable when looking at
	#distributions with very low variance. Here's a more stable version
	stable.bandwidth.fun <- function(x,eps=1e-4){
	    r <- quantile(x, c(0.25, 0.75))
	    h <- (r[2] - r[1])/1.34
	    if (h==0) h <- eps
	    4 * 1.06 * min(sqrt(var(x)), h) * length(x)^(-1/5)
	}
	stable.h <- c(stable.bandwidth.fun(df2p$relative.coord),stable.bandwidth.fun(df2p$meth))

	pp <- ggplot(df2p) + aes(x=relative.coord,y=meth) + 
		  stat_density2d(geom="tile",fill="#1F78B4", aes(alpha=..density..^0.25), contour = FALSE, h=stable.h) +
		  ylim(0, 1) + ylab("methylation") + xlab("relative position") + theme(legend.position="none") +
		  geom_vline(xintercept = c(0,1), linetype = "solid",size=1.5)
	return(pp)
}


## rnb.section.region.profiles
##
## Computes methylation distributions for various region types and sample groups
##
## @param report  Report to contain the methylation deviation section. This must be an object of type
##                \code{\linkS4class{Report}}.
## @param rnb.set RnBSet
## @param reg.types region types that should be reported on
## @param columns Optional; predefined column names (in the form of a \code{character} vector) or indices (an
##                \code{integer} vector) in the sample annotation table. Only these columns are considered for grouping
##                samples and defining profiles. All other columns in the phenotype table are ignored.
## @param subsample restrict the analysis to this many randomly chosen site occurrences per region type.
##				  This can help reduce the runtime and memory requirements
## @return The modified report.
## @author Fabian Mueller
rnb.section.region.profiles <- function(report, rnb.set, reg.types, columns=rnb.getOption("exploratory.columns"),
		subsample=rnb.getOption("distribution.subsample")) {
	if (!inherits(report, "Report")) {
		stop("invalid value for report")
	}
	regTypes_notInSet <- setdiff(reg.types, summarized.regions(rnb.set))
	if (length(regTypes_notInSet)>0){
		reg.types <- intersect(reg.types, summarized.regions(rnb.set))
		logger.warning(c("The following regions are not annotated in the RnBSet object:", paste(regTypes_notInSet,collapse=","), "--> just using",paste(reg.types,collapse=",")))
	}

	txt <- c("Methylation profiles were computed for the specified region types. ",
			 "Composite plots are shown")
	report <- rnb.add.section(report, "Regional Methylation Profiles", txt)
	
	sample.inds <- rnb.sample.groups(rnb.set, columns = columns)
	sample.inds <- c(list("all samples"=list(all=1:length(samples(rnb.set)))),sample.inds)
	rplots <- list()
	for (i in  1:length(reg.types)){
		rt <- reg.types[i]
		reg.rel.coords <- rnb.find.relative.site.coord(rnb.set,rt)
		n.sites <- nrow(reg.rel.coords)
		if (subsample > 0 && subsample < n.sites){
			logger.info(c("Subsampling",subsample,"of",n.sites,"site occurences","(region type:",rt,")"))
			reg.rel.coords <- reg.rel.coords[sort(sample.int(n.sites,subsample)),]
		}
		for (j in 1:length(sample.inds)){
			cc <- names(sample.inds)[j]
			fname <- paste("regionProfile_","reg",i,"_col",j,sep="")
			logger.status(c("Plotting region profiles for region", rt, "and column",cc,"..."))
			pp <- rnb.plot.region.profiles(rnb.set,sample.inds[[j]],region.profile=reg.rel.coords)
			report.plot <- createReportGgPlot(pp,fname, report, width = 10, height = 5, create.pdf = TRUE)
			report.plot <- off(report.plot,handle.errors=TRUE)
			rplots <- c(rplots,report.plot)
		}
	}
	
	traits <- names(sample.inds)
	names(traits) <- paste("col",1:length(sample.inds),sep="")
	region.types <- reg.types
	names(region.types) <- paste("reg",1:length(region.types),sep="")
	setting.names <- list(
			'Region type'  = region.types,
			'Sample trait' = traits)
	description <- 'Regional methylation profiles (composite plots) according to sample groups. 
					For each region in the corresponding region type, relative coordinates of 0 and 1 corresponds to the start
					and end coordinates of that region respectively. Coordinates smaller than 0 and greater than 1 denote flanking regions
					normalized by region length. Scatterplot smoothers for each sample and sample group were fit. Horizontal lines indicate
					region boundaries. For smoothing, generalized additive models with cubic spine smoothing were used.
					Deviation bands indicate 95% confidence intervals'
	report <- rnb.add.figure(report, description, rplots, setting.names)
	
	return(report)
}

#' rnb.step.region.profiles
#'
#' Computes methylation distributions for various region types and sample groups
#'
#' @param rnb.set RnBSet
#' @param report  Report to contain the methylation deviation section. This must be an object of type
#'                \code{\linkS4class{Report}}.
#' @param region.types for which region types should distributions be computed
#' @param columns Optional; predefined column names (in the form of a \code{character} vector) or indices (an
#'                \code{integer} vector) in the sample annotation table. Only these columns are considered for grouping
#'                samples and defining profiles. All other columns in the phenotype table are ignored.
#' @param subsample restrict the analysis to this many randomly chosen site occurrences per region type.
#'				  This can help reduce the runtime and memory requirements
#' @return The modified report.
#'
#' @author Fabian Mueller
#' @noRd
rnb.step.region.profiles <- function(rnb.set, report, region.types, columns=rnb.getOption("exploratory.columns"),
		subsample=rnb.getOption("distribution.subsample")) {
	if (!inherits(rnb.set, "RnBSet")) {
		stop("invalid value for rnb.set")
	}
	if (!inherits(report, "Report")) {
		stop("invalid value for report")
	}
	if (!(is.character(region.types) && length(region.types) != 0 && all(!is.na(region.types)))) {
		stop("invalid value for region.types")
	}
	logger.start("Drawing regional methylation profiles")
	logger.start("Creating a Corresponding Section in the Report")
	report <- rnb.section.region.profiles(report,rnb.set,region.types,columns=columns,subsample=subsample)
	logger.completed()
	logger.completed()
	return(report)
}


## locus.profile.get.base.tracks
##
## For a genomic location, this function retrieves a list of basic tracks for later plotting
## They include a chromosome ideogram, genome coordinates, ENSEMBL gene annotationm, 
## CpG Island annotation.
## 
## @param chrom chromosome of window to plot
## @param start start coordinate of window to plot
## @param end end coordinate of window to plot
## @param assembly genome assembly. currently only \code{'hg19'}, \code{'mm10'} and \code{'mm9'} are supported.
## @return a list of \code{Gviz} tracks to be plotted
##
locus.profile.get.base.tracks <- function(chrom,start,end,assembly="hg19"){
	if (start < 0 || end < 0 || start >= end){
		stop("invalid values for start and end coordinates")
	}
	if (!chrom %in% names(rnb.get.chromosomes(assembly=assembly))) {
		stop("invalid value for chrom")
	}
	
	rnb.require("biomaRt")
	mart <- NULL
	featMap <- Gviz:::.getBMFeatureMap()
	if (assembly == "hg19"){
		# mart <- tryCatch(
		# 	useMart("ensembl", dataset = "hsapiens_gene_ensembl"),
		# 	error = function(ee) {
		# 		logger.warning(c("could not retrieve Ensembl genes from biomart: ",ee$message))
		# 		NULL
		# 	}
		# )
		mart <- tryCatch(
			useMart("ENSEMBL_MART_ENSEMBL", dataset = "hsapiens_gene_ensembl",host="feb2014.archive.ensembl.org"),
			error = function(ee) {
				logger.warning(c("could not retrieve Ensembl genes from biomart: ",ee$message))
				NULL
			}
		)
		featMap["symbol"] <- "external_gene_id"
	} else if (assembly == "hg38"){
		mart <- tryCatch(
			useMart("ENSEMBL_MART_ENSEMBL", dataset = "hsapiens_gene_ensembl",host="may2015.archive.ensembl.org"),
			error = function(ee) {
				logger.warning(c("could not retrieve Ensembl genes from biomart: ",ee$message))
				NULL
			}
		)
		featMap["symbol"] <- "external_gene_name"
	} else if (assembly == "mm9"){
		mart <- tryCatch(useMart("ENSEMBL_MART_ENSEMBL", dataset = "mmusculus_gene_ensembl",host="may2012.archive.ensembl.org"),
			error = function(ee) {
				logger.warning(c("could not retrieve Ensembl genes from biomart: ",ee$message))
				NULL
			}
		)
		featMap["symbol"] <- "external_gene_id"
	} 
	else if (assembly == "mm10"){
		mart <- tryCatch(useMart("ENSEMBL_MART_ENSEMBL", dataset = "mmusculus_gene_ensembl",host="aug2014.archive.ensembl.org"),
			error = function(ee) {
				logger.warning(c("could not retrieve Ensembl genes from biomart: ",ee$message))
				NULL
			}
		)
		featMap["symbol"] <- "external_gene_name"
	} else {
		stop("invalid assembly")
	}
	itrack <- IdeogramTrack(genome=assembly, chromosome=chrom)
	gtrack <- GenomeAxisTrack(labelPos="above",add53=TRUE,add35=TRUE)
	
	if (!is.null(mart)){
		grtrack.ensembl <- tryCatch(
				BiomartGeneRegionTrack(genome=assembly, biomart=mart, featureMap=featMap, chromosome=chrom,start=start,end=end,name="ENSEMBL genes",
									   showId=TRUE,size=0.5,
									   col.title="white",background.title="lightsteelblue4"),
				error = function(ee) {
					logger.warning(c("could not create ENSEMBL gene track: ",ee$message))
					NULL
				}
		)
	} else {
		grtrack.ensembl <- NULL
	}
		
	cgitrack <- tryCatch(
			UcscTrack(genome=assembly,chromosome=chrom,from=start,to=end,track="cpgIslandExt",trackType="AnnotationTrack",
					  start="chromStart",end="chromEnd",id="name",shape="box",fill="dark green",size=0.5,name="CpG Islands",
					  col.title="white",background.title="lightsteelblue4"),
			error = function(ee) {
				logger.warning(c("could not create CpG Island track: ",ee$message))
				NULL
			}
	)
	
	tracklist <- list(itrack,gtrack,grtrack.ensembl,cgitrack)
	is.error <- sapply(tracklist,is.null)
	return(tracklist[!is.error])
}

## locus.profile.get.methylation.track.param.processing
##
## Performs parameter processing for methylation track generation
## 
## @param chrom chromosome of window to plot
## @param start start coordinate of window to plot
## @param end end coordinate of window to plot
## @param assembly [required when \code{rnbSet} is \code{NULL}]. Genome assembly. currently only \code{'hg19'} and \code{'mm9'} are supported.
## @param rnbSet [required when any of \code{assembly,annot,meth} is \code{NULL}] RnBSet object
## @param annot [required when \code{rnbSet} is \code{NULL}]. Annotation table for methylation values. As obtained by \code{annotation(rnbSet)}
## @param meth [required when \code{rnbSet} is \code{NULL}]. Methylation value table. As obtained by \code{meth(rnbSet)}
## @param region.type On what level should methylation values be processed. Must be one of \code{"sites"} or region types specified in \code{rnb.region.types()}
##        when \code{rnbSet} is not \code{NULL}, it must be compatible with it
## @return a \code{GRanges} object for the specified genomic interval with the methylation values stored in \code{elementMetadata}
##
locus.profile.get.methylation.track.param.processing <- function(chrom,start,end,assembly=NULL,rnbSet=NULL,annot=NULL,meth=NULL,region.type="sites"){
	if (!inherits(rnbSet, "RnBSet") && !is.null(rnbSet)) {
		stop("invalid value for rnbSet")
	}
	if (start < 0 || end < 0 || start >= end){
		stop("invalid values for start and end coordinates")
	}
	if (!chrom %in% names(rnb.get.chromosomes(assembly=assembly))) {
		stop("invalid value for chrom")
	}
	if (is.null(rnbSet)){
		if (is.null(assembly) || is.null(annot) || is.null(meth) || (nrow(meth)!=nrow(annot))){
			stop("Missing or invalid annot, meth or assembly")
		}
	} else {
		meth <- meth(rnbSet,type=region.type)
		annot <- annotation(rnbSet,type=region.type)
		assembly <- assembly(rnbSet)
	}
	
	if (region.type == "sites"){
		is.in.locus <- annot$Chromosome == chrom & annot$Start >= start & annot$End <= end
	} else {
		is.in.locus <- annot$Chromosome == chrom & annot$End >= start & annot$Start <= end
	}
	if (sum(is.in.locus) < 1){
		warning("No matching sites found. returning NULL")
		return(NULL)
	}
	aa <- annot[is.in.locus,]
	mm <- meth[is.in.locus,]
	if (sum(is.in.locus) == 1){
		mm <- matrix(mm,nrow=1)
		colnames(mm) <- colnames(meth)
	}
	mm <- data.frame(mm,Chromosome=aa$Chromosome,Start=aa$Start,End=aa$End,Strand=aa$Strand,stringsAsFactors=FALSE,check.names=FALSE)
	#make strand unique for plotting
	mm$Strand <- rep("*",nrow(mm))
	mm.gr <- data.frame2GRanges(mm, ids = NULL, chrom.column = "Chromosome", start.column = "Start",
			end.column = "End", strand.column = "Strand", assembly = assembly)
	
	return(mm.gr)
}

## locus.profile.get.methylation.track.heatmap
##
## For a genomic location, this function retrieves a list of methylation heatmap tracks based on the provided
## methylation information
## 
## @param chrom chromosome of window to plot
## @param start start coordinate of window to plot
## @param end end coordinate of window to plot
## @param assembly [required when \code{rnbSet} is \code{NULL}]. Genome assembly. currently only \code{'hg19'} and \code{'mm9'} are supported.
## @param rnbSet [required when any of \code{assembly,annot,meth} is \code{NULL}] RnBSet object
## @param annot [required when \code{rnbSet} is \code{NULL}]. Annotation table for methylation values. As obtained by \code{annotation(rnbSet)}
## @param meth [required when \code{rnbSet} is \code{NULL}]. Methylation value table. As obtained by \code{meth(rnbSet)}
## @param grps a list of indices for each group to be compared or NULL if no sample grouping information should be displayed
## @param region.type On what level should methylation values be processed. Must be one of \code{"sites"} or region types specified in \code{rnb.region.types()}
##        when \code{rnbSet} is not \code{NULL}, it must be compatible with it
## @param cvals.grps colors to be used for the different groups
## @param cvals.meth colors to be used for methylation values and heatmaps
## @return a list of \code{Gviz} tracks to be plotted
##
locus.profile.get.methylation.track.heatmap <- function(chrom,start,end,
		assembly=NULL,rnbSet=NULL,annot=NULL,meth=NULL,grps=NULL,region.type="sites",
		cvals.grps=rnb.getOption("colors.category"), cvals.meth=rnb.getOption("colors.meth")){
	
	if (!(class(grps) %in% c("list","array","NULL"))) {
		stop("invalid value for grps")
	}
	mm.gr <- locus.profile.get.methylation.track.param.processing(chrom,start,end,assembly,rnbSet,annot,meth,region.type)
	if (is.null(mm.gr)){
		return(list())
	}
	mtracks <- list()
	rt.str4title <- region.type
	if (region.type=="sites"){
		rt.str4title <- ""
	}
	cvals.grps <- rep(cvals.grps, length.out = length(grps)) #adjust the length of the color vector. Recycle colors if necassary
	if (is.null(grps)){
		mtracks <- list(DataTrack(range=mm.gr,name=paste("meth:",rt.str4title),type=c("heatmap"),ncolor=100,gradient=cvals.meth,
				col.title="white",background.title=cvals.grps[1]))
	} else {
		for (i in 1:length(grps)){
			gg <- names(grps)[i]
			cols.grp <- colnames(elementMetadata(mm.gr))[grps[[gg]]]
			dt <- DataTrack(range=mm.gr,data=cols.grp,name=paste("meth:",rt.str4title," ",gg),type=c("heatmap"),ncolor=100,gradient=cvals.meth,
					col.title="white",background.title=cvals.grps[i],showSampleNames=TRUE)
			mtracks <- c(mtracks,list(dt))
		}
	}
	
	return(mtracks)
}

## locus.profile.get.methylation.track.smooth
##
## For a genomic location, this function retrieves a list of methylation scatterplot and smoothing tracks based on the provided
## methylation information
## 
## @param chrom chromosome of window to plot
## @param start start coordinate of window to plot
## @param end end coordinate of window to plot
## @param assembly [required when \code{rnbSet} is \code{NULL}]. Genome assembly. currently only \code{'hg19'} and \code{'mm9'} are supported.
## @param rnbSet [required when any of \code{assembly,annot,meth} is \code{NULL}] RnBSet object
## @param annot [required when \code{rnbSet} is \code{NULL}]. Annotation table for methylation values. As obtained by \code{annotation(rnbSet)}
## @param meth [required when \code{rnbSet} is \code{NULL}]. Methylation value table. As obtained by \code{meth(rnbSet)}
## @param grps a list of indices for each group to be compared or NULL if no sample grouping information should be displayed
## @param region.type On what level should methylation values be processed. Must be one of \code{"sites"} or region types specified in \code{rnb.region.types()}
##        when \code{rnbSet} is not \code{NULL}, it must be compatible with it
## @param cvals.grps colors to be used for the different groups
## @return a list of \code{Gviz} tracks to be plotted
##
locus.profile.get.methylation.track.smooth <- function(chrom,start,end,
		assembly=NULL,rnbSet=NULL,annot=NULL,meth=NULL,grps=NULL,region.type="sites",cvals.grps=rnb.getOption("colors.category"),smooth.profile="wide"){
	
	if (!(class(grps) %in% c("list","array","NULL"))) {
		stop("invalid value for grps")
	}
	mm.gr <- locus.profile.get.methylation.track.param.processing(chrom,start,end,assembly,rnbSet,annot,meth,region.type)
	
	if (is.null(mm.gr)){
		return(list())
	}
	
	grouping <- NULL
	if (!is.null(grps)){
		grouping <- rep(NA,ncol(elementMetadata(mm.gr)))
		for (gg in names(grps)){
			grouping[grps[[gg]]] <- gg
		}
		grouping <- factor(grouping, levels=names(grps))
	}
	smooth.span <- 0.9
	smooth.degree <- 1
	smooth.family <- "gaussian"
	if (smooth.profile=="wide"){
		smooth.span <- 0.9
	} else if (smooth.profile=="narrow"){
		smooth.span <- 0.2
	} else {
		stop("invalid value for smooth.profile")
	}
	cvals.grps <- rep(cvals.grps, length.out = length(grps)) #adjust the length of the color vector. Recycle colors if necassary
	mtrack.smooth <- DataTrack(range=mm.gr,groups=grouping,name=paste("methylation"),type=c("smooth","p"),span=smooth.span,degree=smooth.degree,family=smooth.family,jitter.x=FALSE,col=cvals.grps, legend=FALSE,
			col.title="white",col.axis="white",background.title="lightsteelblue4")
	
	return(list(mtrack.smooth))
}


#' rnb.plot.locus.profile 
#'
#' Computes methylation distributions for various region types and sample groups
#'
#' @param rnbSet RnBSet object
#' @param chrom chromosome of window to plot
#' @param start start coordinate of window to plot
#' @param end end coordinate of window to plot
#' @param grps a list of indices for each group to be compared or NULL if no sample grouping information should be displayed
#' @param plot.m.regions character vector of region types whose methylation values should be displayed If \code{grps} is not \code{NULL}
#' 		  the methylation values will be separated by sample groups.
#' @param plot.m.heatmap flag indicating whether sites methylation values should be displayed in a heatmap. If \code{grps} is not \code{NULL}
#' 		  the heatmaps will be separated by sample groups.
#' @param plot.m.smooth flag indicating whether a scatterplot with smoothing curves should be displayed. If \code{grps} is not \code{NULL}
#' 		  the colors will be used to separate sample groups.
#' @param cvals.grps colors to be used for the different groups
#' @param cvals.meth colors to be used for methylation values and heatmaps
#' @param smooth.profile profile to be used for the smoothing curves. Allowed values include 
#'        \code{wide} (default) which yields smoother curvers and \code{narrow} which yields more "wiggly" curves
#' @return a \code{ggplot2} plot object containing the plot
#'
#' @author Fabian Mueller
#' @export
#' @examples
#' \donttest{
#' #see RnBeads vignette (section: 'Generating Locus Profile Plots') for examples
#' }
rnb.plot.locus.profile <- function(rnbSet,chrom,start,end,grps=NULL,
		plot.m.regions=NULL,plot.m.heatmap=TRUE,plot.m.smooth=TRUE,
		cvals.grps=rnb.getOption("colors.category"),cvals.meth=rnb.getOption("colors.meth"), smooth.profile="wide"){	
	if (!inherits(rnbSet, "RnBSet")) {
		stop("invalid value for rnbSet")
	}
	if (!(class(grps) %in% c("list","array","NULL"))) {
		stop("invalid value for grps")
	}
	if (length(grps)<1 && !is.null(grps)) {
		stop("invalid value for grps")
	}
	if (!chrom %in% names(rnb.get.chromosomes(assembly=assembly(rnbSet)))) {
		stop("invalid value for chrom")
	}
	if (start < 0 || end < 0 || start > end){
		stop("invalid values for start and end coordinates")
	}
	if (!is.element(smooth.profile, c("wide", "narrow"))){
		stop("invalid value for smooth.profile")
	}
	rnb.require("Gviz")	
	
	#get methylation info
	mm <- meth(rnbSet,type="sites")
	site.annot <- annotation(rnbSet,type="sites")
	
	tracklist <- locus.profile.get.base.tracks(chrom,start,end,assembly=assembly(rnbSet))

	#region information
	for (rr in plot.m.regions){
		mmr <- meth(rnbSet,type=rr)
		reg.annot <- annotation(rnbSet,type=rr)
		
		tts <- locus.profile.get.methylation.track.heatmap(chrom,start,end,
				assembly=assembly(rnbSet),annot=reg.annot,meth=mmr,grps=grps,region.type=rr,cvals.meth=cvals.meth)
		tracklist <- c(tracklist,tts)
	}
	
	if (plot.m.heatmap){
		tts <- locus.profile.get.methylation.track.heatmap(chrom,start,end,
				assembly=assembly(rnbSet),annot=site.annot,meth=mm,grps=grps,region.type="sites",cvals.meth=cvals.meth,cvals.grps=cvals.grps)
		tracklist <- c(tracklist,tts)
	}
	if (plot.m.smooth){
		tts <- locus.profile.get.methylation.track.smooth(chrom,start,end,
				assembly=assembly(rnbSet),annot=site.annot,meth=mm,grps=grps,region.type="sites",cvals.grps=cvals.grps, smooth.profile=smooth.profile)
		tracklist <- c(tracklist,tts)
	}
	
	plotTracks(tracklist,from=start,to=end)
	#plotTracks(c(tracklist,tts),from=start,to=end)
}

#' rnb.section.locus.profiles 
#'
#' given a list of interesting loci, generates a section in an RnBeads report
#' containing locus specific information and methylation profiles
#'
#' @param rnb.set Methylation dataset as an object of type inheriting \code{\linkS4class{RnBSet}}.
#' @param locus.tab a table with columns named \code{chrom,start,end,id} specificying the genomic chromosome, start location end location and a name for
#' 	      the locus respectively
#' @param type you can specify a predefined section type like 'genes' to get custom section text. Otherwise, the generic 'custom regions' will be used
#' @param grp.cols a list of lists of indices for each group to be compared or NULL if no sample grouping information should be displayed
#' @param cvals.grps colors to be used for the different groups
#' @param cvals.meth colors to be used for methylation values and heatmaps
#' @param include.all.sample.group include a grouping  containing all samples
#' @return the modified report
#'
#' @author Fabian Mueller
#' @noRd
rnb.section.locus.profiles <- function(rnb.set, locus.tab, report, type="", grp.cols = rnb.getOption("exploratory.columns"),
		cvals.grps = rnb.getOption("colors.category"), cvals.meth = rnb.getOption("colors.meth"), include.all.sample.group=TRUE) {
	
	if (!all(c("chrom","start","end","id") %in% colnames(locus.tab))){
		stop("Invalid value for locus.tab: it must be a table containing the columns chrom,start,end,id")
	}
	if (any(duplicated(locus.tab[,"id"]))){
		stop("Invalid value for locus.tab: non-unique ids")
	}
	rnb.require("Gviz")
	rnb.require("biomaRt")
	if (rnb.getOption("logging") && logger.isinitialized() == FALSE) {
		logger.start(fname = NA) # initialize console logger
	}
	
	grps.all <- rnb.sample.groups(rnb.set, columns = grp.cols)
	if (include.all.sample.group){
		grps.all <- c(list("All Samples"=list("all"=1:length(samples(rnb.set)))),grps.all)
	}
	logger.info(c("Grouping by:",paste(names(grps.all),collapse=",")))

	logger.start("Retrieving Site Annotation and Methylation Values")
	mm <- meth(rnb.set,type="sites")
	site.annot <- annotation(rnb.set,type="sites")
	assembly <- assembly(rnb.set)
	logger.completed()
	
	if (type=="genes"){
		fname <- "gene_loci.csv"
		fname <- rnb.write.table(locus.tab,fname,fpath=rnb.get.directory(report, "data", absolute = TRUE),format="csv",gz=FALSE,row.names = FALSE,quote=FALSE)	
		txt <- c("Locus profiles were generated for selected genes of interest. Their genomic locations can be found in ",
				 "<a href=\"", rnb.get.directory(report, "data"), "/", fname, "\">this table</a> (coordinates are based on assembly ",assembly,").")
		rnb.add.section(report, "Locus Profiles (Genes)", txt)
	} else {
		fname <- "custom_loci.csv"
		fname <- rnb.write.table(locus.tab,fname,fpath=rnb.get.directory(report, "data", absolute = TRUE),format="csv",gz=FALSE,row.names = FALSE,quote=FALSE)	
		txt <- c("Locus profiles were generated for selected regions of interest. Their genomic locations can be found in ",
				 "<a href=\"", rnb.get.directory(report, "data"), "/", fname, "\">this table</a> (coordinates are based on assembly ",assembly,").")
		rnb.add.section(report, "Locus Profiles (Custom Regions)", txt)
	}
	
	
	logger.start("Generating Locus Views")
	do.plots <- function(i){
		plot.list <- list()
		logger.status(c("Processing Locus",locus.tab[i,"id"]))
		chrom <- locus.tab[i,"chrom"]
		start <- locus.tab[i,"start"]
		end   <- locus.tab[i,"end"]
		span <- end - start
		extend.by <- round(span*0.05)

		tl.base <- locus.profile.get.base.tracks(chrom,start,end,assembly=assembly)
		for (j in 1:length(grps.all)){
			# logger.status(c("Sample grouping by",names(grps.all)[j],"[",locus.tab[i,"id"],"]"))
			ggs <- grps.all[[j]]
			tts.heat <- locus.profile.get.methylation.track.heatmap(chrom,start,end,
					assembly=assembly,annot=site.annot,meth=mm,grps=ggs,region.type="sites",cvals.grps=cvals.grps,cvals.meth=cvals.meth)
			tts.smooth <- locus.profile.get.methylation.track.smooth(chrom,start,end,
					assembly=assembly,annot=site.annot,meth=mm,grps=ggs,region.type="sites",cvals.grps=cvals.grps)
			tl.all <- c(tl.base,tts.heat,tts.smooth)
			
			figName <- paste("locusMeth",paste("loc",i,sep=""),paste("grp",j,sep=""),sep="_")
			report.plot <- createReportPlot(figName, report, create.pdf=TRUE, high.png=300, width = 12, height = 7)
				tryCatch(
					plotTracks(tl.all,from=start,to=end,extend.right=extend.by,extend.left=extend.by),
					error = function(ee) {
						logger.warning(c("A plotting error occurred ","[",locus.tab[i,"id"],"]:",ee$message))
						print(rnb.message.plot("Plot could not be created"))
					}
				)
			off(report.plot)
			plot.list <- c(plot.list,list(report.plot))
		}
		return(plot.list)
	}
	if(parallel.isEnabled()){
		figPlots <- foreach(i=1:nrow(locus.tab),.combine="c") %dopar% {
			do.plots(i)
		}
	} else {
		figPlots <- list()
		for (i in 1:nrow(locus.tab)){
			figPlots <- c(figPlots,do.plots(i))
		}
	}
	groups <- names(grps.all)
	names(groups) <- paste("grp",1:length(grps.all),sep="")
	loci <- locus.tab[,"id"]
	names(loci) <- paste("loc",1:nrow(locus.tab),sep="")
	setting.names <- list("Locus"=loci,"Sample group" = groups)
	description <- c('Locus profiles. Annotation elements include ENSEMBL gene annotations, CpG Islands according to UCSC. ',
					 '(Groupwise) methylation profiles are shown as heatmap and scatterplot smoothers (Loess Gaussian smoothing with degree 1).')
	report <- rnb.add.figure(report, description, figPlots, setting.names)
	logger.completed()
	
	return(report)
}

#' sortAndExtendRegions
#'
#' helper function to sort a data.frame of genomic regions according to ID and extend
#' the boundaries by a certain percentage.
#'
#' @param interesting.loci region table
#' @param extend.by percentage by which the regions are extended
#' @param do.sort sort according to region.id
#' @return The modified report.
#'
#' @author Fabian Mueller
#' @noRd
sortAndExtendRegions <- function(interesting.loci, extend.by=0.1, do.sort=TRUE){
	if (do.sort){
		#sort according to id
		interesting.loci <- interesting.loci[order(interesting.loci$id),]
	}
	#extend the window by extend.by*100% on each side
	spans <- interesting.loci$end - interesting.loci$start
	interesting.loci$start <- interesting.loci$start - round(spans*extend.by)
	interesting.loci$end <- interesting.loci$end + round(spans*extend.by)
	return(interesting.loci)
}

#' rnb.step.locus.profiles
#'
#' Computes methylation distributions for various region types and sample groups
#'
#' @param rnb.set RnBSet
#' @param report  Report to contain the methylation deviation section. This must be an object of type
#'                \code{\linkS4class{Report}}.
#' @param locus.bed a bed file containing regions of interest. If not supplied the corresponding section will be skipped
#' @param gene.list a list of gene symbols that locus profiles will be generated for. If not supplied the corresponding section will be skipped
#' @param gene.annotation.name Name of the gene annotation to be used. Must be one of the region types in \code{rnb.set} and must contain a column called \code{symbol}
#' @param ... arguments passed to \code{rnb.section.locus.profiles}
#' @return The (modified) report.
#'
#' @author Fabian Mueller
#' @noRd
rnb.step.locus.profiles <- function(rnb.set, report, locus.bed="", gene.list=c(), gene.annotation.name="genes", ...) {
	logger.start("Creating locus profiles")
	if (file.exists(locus.bed)){
		logger.start("Creating locus profiles for custom regions")
		logger.info(c("loading region data from file:",locus.bed))
		region.tab <- rnb.load.bed(locus.bed)
		colnames(region.tab)[1:3] <- c("chrom","start","end")
		if (ncol(region.tab)<4){
			if (!is.null(rownames(region.tab))){
				region.tab <- data.frame(region.tab,id=rownames(region.tab),stringsAsFactors=FALSE)
			} else {
				region.tab <- data.frame(region.tab,id=paste0("locus",1:nrow(region.tab)),stringsAsFactors=FALSE)
			}
		}
		colnames(region.tab)[4] <- "id"
		# region.tab <- sortAndExtendRegions(region.tab)
		report <- rnb.section.locus.profiles(rnb.set, region.tab, report, ...)
		logger.completed()
	}
	logger.completed()
	if (length(gene.list) > 0 && is.character(gene.list)){
		logger.start("Creating locus profiles for custom regions")
		if (!is.element("genes",summarized.regions(rnb.set))){
			logger.warning("RnBSet does not contain annotation for genes --> skipping gene locus profiling")
			return(report)
		}
		gene.list <- tolower(gene.list)
		annot.genes <- annotation(rnb.set,type=gene.annotation.name)
		annot.genes <- annot.genes[,c("Chromosome","Start","End","symbol")]
		colnames(annot.genes) <- c("chrom","start","end","id")
		annot.genes$chrom <- as.character(annot.genes$chrom)
		annot.genes$id <- as.character(annot.genes$id)
		annot.gene.symbols.l <- tolower(annot.genes[,"id"])
		annot.inds <- na.omit(match(gene.list,annot.gene.symbols.l))
		genes.not.covered <- setdiff(gene.list,annot.gene.symbols.l)
		if (length(genes.not.covered)>0){
			logger.info(c("Genes not covered in annotation:",paste(genes.not.covered,collapse=",")))
		}
		if (length(annot.inds) < 1){
			logger.warning("No genes from the list are covered in the RnBSet --> skipping gene locus profiling")
			return(report)
		}
		interesting.loci <- sortAndExtendRegions(annot.genes[annot.inds,])
		report <- rnb.section.locus.profiles(rnb.set, interesting.loci, report, type="genes", ...)
		logger.completed()
	}
	return(report)
}

Try the RnBeads package in your browser

Any scripts or data that you put into this service are public.

RnBeads documentation built on Nov. 1, 2018, 2:16 a.m.