R/correlated_regions_enrich.R

Defines functions enrich_with_histone_mark enrich_with_methylation dist_by_closeness2

Documented in enrich_with_histone_mark enrich_with_methylation

# == title
# Normalize histone modification signals to target regions
#
# == param
# -target target regions
# -mark name of the histone mark
# -sample_id a vector of sample IDs
# -mode how to summarize histone modification signals among samples, by defualt is the cross-sample mean signal
# -return_arr whether also return the three dimension array itself
# -... pass to `EnrichedHeatmap::normalizeToMatrix`
#
# == details
# For each sample, the signal is normalized as a matrix, which results an array in which the third
# dimension corresponds to samples. The final normalized matrix which shows e.g. mean signal matrix is calcualted
# by ``apply(array, c(1, 2), mode)``.
#
# == values
# If ``return_arr`` is set to ``FALSE``, the funtion returns a matrix which can be directly sent to 
# `EnrichedHeatmap::EnrichedHeatmap`. If ``return_arr`` is ``TRUE``, the returned value is a list in which
# the first element is the original array and the second element is the normalized matrix.
#
# == author
# Zuguang Gu <z.gu@dkfz.de>
# 
enrich_with_histone_mark = function(target, mark, sample_id, mode = mean, return_arr = FALSE, ...) {

	target_name = deparse(substitute(target))
	# 200 is number of windows (5000 + 5000)/50

	all_target_chromosome = unique(as.vector(seqnames(target)))
	hm_list = get_peak_list(mark, sample_id = intersect(sample_id, chipseq_hooks$sample_id(mark)), chr = all_target_chromosome)

	sample = names(hm_list)
	flag = 0
	for(i in seq_along(sample)) {
		message(qq("@{sample[i]}: normalize @{mark} signal to @{target_name}."))
	    tm = normalizeToMatrix(hm_list[[i]], target, keep = c(0, 0.99), ...)
	    if(!flag) {
	    	arr = array(dim = c(length(target), dim(tm)[2], length(hm_list)))
	    	dimnames(arr) = list(rownames(tm), colnames(tm), names(hm_list))
	    	flag = 1
	    }
	    arr[, , i] = tm
	}

	mat = apply(arr, c(1, 2), mode, na.rm = TRUE)
	mat = copyAttr(tm, mat)

	if(return_arr) {
		return(list(arr = arr, mat = mat))
	} else {
		return(mat)
	}
}

# == title
# Normalize methylation to target regions
#
# == param
# -target target regions
# -sample_id a vector of sample IDs
# -mode how to summarize methylation among samples, by default it is the cross-sample mean methylation. 
#       Since methylation is represented as matrix, here we use ``row*``-family functions (e.g. `rowMeans`, `matrixStats::rowMedians`)
# -extend pass to `EnrichedHeatmap::normalizeToMatrix`
# -smooth pass to `EnrichedHeatmap::normalizeToMatrix`
# -... pass to `EnrichedHeatmap::normalizeToMatrix`
#
# == value
# A matrix which can be directly visualized by `EnrichedHeatmap::EnrichedHeatmap`
#
# == author
# Zuguang Gu <z.gu@dkfz.de>
enrich_with_methylation = function(target, sample_id, mode = rowMeans, extend = 5000, smooth = TRUE, ...) {

	target_name = deparse(substitute(target))

	if(length(extend) == 1) extend = rep(extend, 2)

	# extrace methylation value which in [-5k, 5k] from TSS, and calculate mean methylation in each subgroup
	target_extend = GRanges(seqnames = seqnames(target), ranges = IRanges(start(target)-extend[1], end(target)+extend[2]))
	# process raw methylation data
	meth_gr = GRanges()
	for(chr in sort(unique(as.vector(seqnames(target))))) {
	    methylation_hooks$set_chr(chr, verbose = FALSE)
	    gr = methylation_hooks$gr
	    mtch = as.matrix(findOverlaps(gr, target_extend))
	    ind = unique(mtch[, 1])
	    mm = mode(methylation_hooks$meth[ind, sample_id, drop = FALSE], na.rm = TRUE)
	    gr = gr[ind]
	    mcols(gr) = mm
	    meth_gr = c(meth_gr, gr)
	}
	colnames(mcols(meth_gr)) = "mean_meth"
	message(qq("normalize methylation signals to @{target_name}..."))
	mat = normalizeToMatrix(meth_gr, target, value_column = "mean_meth", extend = extend, smooth = smooth, ...)
	return(mat)
}


dist_by_closeness2 = function(...) {
	as.dist(dist_by_closeness(...))
}
jokergoo/epik documentation built on Sept. 28, 2019, 9:20 a.m.