R/chromatin_states_transitions.R

# == title
# Generate transition matrix from chromHMM results
#
# == param
# -gr_list_1 a list of `GenomicRanges::GRanges` objects which contain chromatin states in group 1.
#            The first column in meta columns should be the states. Be careful when importing bed files to 
#            `GenomicRanges::GRanges` objects (start positions in bed files are 0-based while 1-based in ``GRanges`` objects.
# -gr_list_2 a list of `GenomicRanges::GRanges` objects which contains chromatin states in group 2.
# -window window size which was used to do chromHMM states prediction. If it is not specified, the greatest common divisor
#         of the width of all regions is used.
# -min_1 If there are multiple samples in group 1, the state assigned to reach region should have recurrency larger or equal to this value.
# -min_2 same as ``min_1``, but for samples in group 2.
#
# == detail
# The whole genome is segmentated by size of ``window`` and states with highest occurence among samples are assigned to segments.
#
# To make the function run successfully, number of segments in all samples should be all the same and there should not be gaps between regions.
#
# == value
# A transition matrix in which values represent total width of regions that transite from one state to the other. Rows correspond
# to group 1 and columns correspond to group 2.
#
# == seealso
# The matrix can be sent to `chromatin_states_transition_chord_diagram` to visualize.
#
# == author
# Zuguang Gu <z.gu@dkfz.de>
#
# == example
# set.seed(123)
# gr_list_1 = lapply(1:5, function(i) {
# 	pos = sort(c(0, sample(1:9999, 99), 10000))*200
# 	GRanges(seqnames = "chr1", ranges = IRanges(pos[-101] + 1, pos[-1]), 
# 	    states = paste0("state_", sample(1:9, 100, replace = TRUE)))
# })
# gr_list_2 = lapply(1:5, function(i) {
# 	pos = sort(c(0, sample(1:9999, 99), 10000))*200
# 	GRanges(seqnames = "chr1", ranges = IRanges(pos[-101] + 1, pos[-1]), 
# 	    states = paste0("state_", sample(1:9, 100, replace = TRUE)))
# })
# mat = make_transition_matrix_from_chromHMM(gr_list_1, gr_list_2)
make_transition_matrix_from_chromHMM = function(gr_list_1, gr_list_2, window = NULL, 
	min_1 = floor(length(gr_list_1)/2), min_2 = floor(length(gr_list_2)/2)) {

	if(inherits(gr_list_1, "GRanges")) {
		gr_list_1 = list(gr_list_1)
		min_1 = 0
	}
	if(inherits(gr_list_2, "GRanges")) {
		gr_list_2 = list(gr_list_2)
		min_2 = 0
	}

	if(is.null(window)) {
		window = mGCD(c(sapply(gr_list_1, function(gr) mGCD(unique(width(gr)))),
		                sapply(gr_list_2, function(gr) mGCD(unique(width(gr))))))
		message(paste0("window is set to ", window))
		if(window == 1) {
			message("when converting bed files to GRanges objects, be careful with the 0-based and 1-based coordinates.")
		}
	}

	# if(is.null(all_states)) {
		all_states = unique(c(unlist(lapply(gr_list_1, function(gr) {unique(as.character(mcols(gr)[, 1]))})),
			                unlist(lapply(gr_list_2, function(gr) {unique(as.character(mcols(gr)[, 1]))}))))
		all_states = sort(all_states)
		message(paste0(length(all_states), " states in total"))
	# }

	message("extract states")
	m1 = as.data.frame(lapply(gr_list_1, function(gr) {
		k = round(width(gr)/window)
		s = as.character(mcols(gr)[, 1])
		as.integer(factor(rep(s, times = k), levels = all_states))
	}))
	m2 = as.data.frame(lapply(gr_list_2, function(gr) {
		k = round(width(gr)/window)
		s = as.character(mcols(gr)[, 1])
		as.integer(factor(rep(s, times = k), levels = all_states))
	}))

	m1 = as.matrix(m1)
	m2 = as.matrix(m2)

	message("count for each state")
	t1 = rowTabulates(m1)
	t2 = rowTabulates(m2)
	l = rowMaxs(t1) >= min_1 & rowMaxs(t2) >= min_2
	t1 = t1[l, , drop = FALSE]
	t2 = t2[l, , drop = FALSE]

	message("determine states")
	states1 = rowWhichMax(t1)
	states2 = rowWhichMax(t2)

	message("generate transition matrix")
	mat = as.matrix(table(states1, states2))
	rownames(mat) = all_states[as.numeric(rownames(mat))]
	colnames(mat) = all_states[as.numeric(colnames(mat))]
	mat = mat * as.numeric(window)
	class(mat) = "matrix"
	names(dimnames(mat)) = NULL
	return(mat)
}



# == title
# Chord diagram for chromatin states transistion
#
# == param
# -mat the transition matrix. It should be a square matrix in which row names and column names are the same.
#      If it is not, the function will try to re-format it.
# -max_mat if there are several transition matrix to be compared, set it to the matrix with maximum absolute and it will make
#          scales of all matrix the same and comparable.
# -remove_unchanged_transition whether to remove transitions that states are not changed (set the values in diagonal to 0)
# -state_col color for states. It should be a vector of which names correspond to states.
# -legend_position positions of legends. Possible values are "bottomleft", "bottomright", "topright" and "topleft".
#             If the value is specified as vector with length larger than two, the legend will be split into several parts.
#              Set the value to ``NULL`` to suppress legends.
# -... pass to `circlize::chordDiagram`
#
# == details
# Rows of ``mat`` locate at the bottom of the circle by default.
#
# The chord diagram visualizes how much chromatin states change. In the diagram, width of each link represents the total
# width of regions in a certain chromatin state in group 1 that transite to other chromatin state in group 2. The width of 
# each grid represents total width of regions in a certain chromatin in group 1 that transite to all states in group 2.
#
# Chord diagram is implemented in base graphic system, which means, you can add titles or other graphics by base graphic 
# functions (e.g. `graphics::title`, `graphics::text`, ...)
#
# If you want to adjust order of states in the chord diagram, directly change row and column order of the matrix.
# 
# == value
# No value is returned.
#
# == seealso
# `make_transition_matrix_from_chromHMM` which generates transition matrix from chromHMM results.
#
# == author
# Zuguang Gu <z.gu@dkfz.de>
#
# == example
# set.seed(123)
# gr_list_1 = lapply(1:5, function(i) {
# 	pos = sort(c(0, sample(1:9999, 99), 10000))*200
# 	GRanges(seqnames = "chr1", ranges = IRanges(pos[-101] + 1, pos[-1]), 
# 	    states = paste0("state_", sample(1:9, 100, replace = TRUE)))
# })
# gr_list_2 = lapply(1:5, function(i) {
# 	pos = sort(c(0, sample(1:9999, 99), 10000))*200
# 	GRanges(seqnames = "chr1", ranges = IRanges(pos[-101] + 1, pos[-1]), 
# 	    states = paste0("state_", sample(1:9, 100, replace = TRUE)))
# })
# mat = make_transition_matrix_from_chromHMM(gr_list_1, gr_list_2)
# chromatin_states_transition_chord_diagram(mat, legend_position = "bottomleft")
chromatin_states_transition_chord_diagram = function(mat, max_mat = mat, 
	remove_unchanged_transition = TRUE, state_col = NULL, legend_position = NULL, ...) {

	op = par(no.readonly = TRUE)
	on.exit(par(op))
	par(xpd = NA)

	mat = as.matrix(mat)

	if(length(intersect(rownames(mat), colnames(mat))) == 0) {
		rownames(mat) = paste0("E", seq_len(nrow(mat)))
		colnames(mat) = paste0("E", seq_len(ncol(mat)))
	}

	all_states = union(rownames(mat), colnames(mat))
	n_states = length(all_states)
	mat2 = matrix(0, nrow = n_states, ncol = n_states)
	rownames(mat2) = all_states
	colnames(mat2) = all_states
	mat2[rownames(mat), colnames(mat)] = mat  

	mat = mat2

	if(is.null(state_col)) {
		col_fun = colorRamp2(0:10, brewer.pal(11, "Spectral"))
		x = (seq_along(all_states)-1)*10/(length(all_states)-1)
		state_col = col_fun(x)
		names(state_col) = all_states
	} else {
		if(is.null(names(state_col))) {
			names(state_col) = all_states
		}
	}

	if(remove_unchanged_transition) {
		for(i in all_states) {
			mat[i, i] = 0
		}
	}

	rownames(mat) = paste0("R_", seq_len(n_states))
	colnames(mat) = paste0("C_", seq_len(n_states))

	state_col2 = c(state_col, state_col)
	names(state_col2) = c(rownames(mat), colnames(mat))

	colmat = rep(state_col2[rownames(mat)], n_states)
	colmat = rgb(t(col2rgb(colmat)), maxColorValue = 255)
	
	# make thin links very light
	qati = quantile(mat, 0.7)
	colmat[mat > qati] = paste0(colmat[mat > qati], "A0")
	colmat[mat <= qati] = paste0(colmat[mat <= qati], "20")
	dim(colmat) = dim(mat)

	de = 360 - (360 - 20 - 30) * sum(mat)/sum(max_mat) - 30
	circos.par(start.degree = -de/4, gap.degree = c(rep(1, n_states-1), de/2, rep(1, n_states-1), de/2))

	chordDiagram(mat, col = colmat, grid.col = state_col2,
		directional = TRUE, annotationTrack = "grid", preAllocateTracks = list(track.height = 0.1), ...)

	for(sn in get.all.sector.index()) {
		if(abs(get.cell.meta.data("cell.start.degree", sector.index = sn) - get.cell.meta.data("cell.end.degree", sector.index = sn)) > 3) {
			xcenter = get.cell.meta.data("xcenter", sector.index = sn, track.index = 2)
			ycenter = get.cell.meta.data("ycenter", sector.index = sn, track.index = 2)
			i_state = as.numeric(gsub("(C|R)_", "", sn))
			circos.text(xcenter, ycenter, i_state, col = "white", font = 2, cex = 0.7, 
				sector.index = sn, track.index = 2, adj = c(0.5, 0.5), niceFacing = TRUE)
			circos.axis(sector.index = sn, track.index = 2, major.tick.percentage = 0.2, labels.away.percentage = 0.2, labels.cex = 0.5)
		}
	}

	if(length(legend_position) == 1) { 
		add_chord_diagram_legend(legend_position, 1:n_states, all_states, state_col[all_states])
	} else if(length(legend_position)== 2) {
		ib = ceiling(n_states/2)
		ind = 1:ib
		add_chord_diagram_legend(legend_position[1], ind, all_states[ind], state_col[all_states][ind])
		ind = (ib+1):n_states
		add_chord_diagram_legend(legend_position[2], ind, all_states[ind], state_col[all_states][ind])
	}

	circos.clear()
}


add_chord_diagram_legend = function(position = c("bottomleft", "bottomright", "topleft", "topright"), 
	index = seq_along(labels), labels, col) {
	
	position = match.arg(position)[1]
	if(length(index) == 0) {
		return(NULL)
	}

	coor = par("usr")
	n = length(labels)
	text_height = strheight("a")
	labels_max_width = max(strwidth(labels))
	legend_width = text_height*(1+0.5) + labels_max_width
	if(position == "bottomleft") {
		x1 = rep(coor[1], n)
		x2 = x1 + text_height
		y1 = coor[3] + (rev(seq_len(n))-1)*1.5*text_height
		y2 = y1 + text_height
		rect(x1, y1, x2, y2, col = col, border = col)
		text((x1+x2)/2, (y1+y2)/2, index, cex = 0.6, font = 2, col = "white")
		text(x2 + 0.5*text_height, (y1+y2)/2, labels, adj = c(0, 0.5), cex = 0.8)
	} else if(position == "bottomright") {
		x1 = rep(coor[2] - labels_max_width, n)
		x2 = x1 + text_height
		y1 = coor[3] + (rev(seq_len(n))-1)*1.5*text_height
		y2 = y1 + text_height
		rect(x1, y1, x2, y2, col = col, border = col)
		text((x1+x2)/2, (y1+y2)/2, index, cex = 0.6, font = 2, col = "white")
		text(x2 + 0.5*text_height, (y1+y2)/2, labels, adj = c(0, 0.5), cex = 0.8)
	} else if(position == "topleft") {
		x1 = rep(coor[1], n)
		x2 = x1 + text_height
		y1 = coor[4] - (seq_len(n)-1)*1.5*text_height
		y2 = y1 - text_height
		rect(x1, y1, x2, y2, col = col, border = col)
		text((x1+x2)/2, (y1+y2)/2, index, cex = 0.6, font = 2, col = "white")
		text(x2 + 0.5*text_height, (y1+y2)/2, labels, adj = c(0, 0.5), cex = 0.8)
	} else if(position == "topright") {
		x1 = rep(coor[2] - labels_max_width, n)
		x2 = x1 + text_height
		y1 = coor[4] - (seq_len(n)-1)*1.5*text_height
		y2 = y1 - text_height
		rect(x1, y1, x2, y2, col = col, border = col)
		text((x1+x2)/2, (y1+y2)/2, index, cex = 0.6, font = 2, col = "white")
		text(x2 + 0.5*text_height, (y1+y2)/2, labels, adj = c(0, 0.5), cex = 0.8)
	}
}
eilslabs/epic documentation built on May 16, 2019, 1:24 a.m.