R/plotDataFrame.R

# == title
# Quickly visualize a data frame
#
# == param
# -df a data frame.
# -overlap how to group numeric columns. If the overlapping rate between the ranges in the
#          current column and previous numeric column is larger than this value, the two columns
#          are treated as under same measurement and should be grouped.
# -nlevel If the number of levels of a character column is larger than this value, the column will
#         be excluded, because it doesn't make any sense to visualize a character vector or matrix
#         that contains huge number of unique elements through a heatmap.
# -show_row_names whether show row names after the last heatmap if there are row names.
# -show_column_names whether show column names for all heatmaps.
# -group a list of index that defines the groupping.
# -group_names names for each group.
# -main_heatmap which group is the main heatmap?
# -km a value larger than 1 means applying k-means clustering on rows for the main heatmap.
# -split one or multiple variables that split the rows.
# -cluster_rows whether perform clustering on rows of the main heatmap.
# -cluster_columns whether perform clustering on columns for all heatmaps.
# -row_order order of rows, remember to turn off ``cluster_rows``
# -... pass to `draw,HeatmapList-method` or `make_layout,HeatmapList-method`
#
# == details
# The data frame contains heterogeneous information. The `plotDataFrame` function provides a simple and quick way to
# visualize information that are stored in a data frame.
#
# There are only a few settings in this function, so the heamtap generated by this functioin
# may look ugly (in most of the time). However, users can customize the style of the heatmaps by manually
# constructing a `HeatmapList` object.
# 
# == value
# A `HeatmapList` object.
#
# == author
# Zuguang Gu <z.gu@dkfz.de>
#
plotDataFrame = function(df, overlap = 0.25, nlevel = 30, show_row_names = TRUE, 
	show_column_names = TRUE, group = NULL, group_names = names(group), 
	main_heatmap = NULL, km = 1, split = NULL, cluster_rows = TRUE, 
	cluster_columns = TRUE, row_order = NULL, ...) {

	if(is.matrix(df)) {
		ht_list = Heatmap(df, show_row_names = show_row_names, show_column_names = show_column_names, row_order = row_order)
	} else if(is.data.frame(df)) {
		
		nc = ncol(df)
		cn = colnames(df)
		ht = NULL
		current_range = NULL
		current_group = 0

		if(is.null(group)) {
			group = list()

			for(i in seq_len(nc)) {
				if(is.numeric(df[[i]])) {
					if(is.null(current_range)) {
						# if previous column are character/factor
						current_range = quantile(df[[i]], c(0.1, 0.9), na.rm = TRUE)
						current_group = current_group + 1
						group[[ current_group ]] = i

					} else {
						# if previous columns are numeric
						range2 = range(df[[i]], c(0.1, 0.9), na.rm = TRUE)
						intersected_range = c(max(current_range[1], range2[1]), min(current_range[2], range2[2]))

						l = df[[i]] >= intersected_range[1] & df[[i]] <= intersected_range[2]
						l2 = df[[i-1]] >= intersected_range[1] & df[[i-1]] <= intersected_range[2]
						if(sum(l)/length(l) > overlap && sum(l2)/length(l2) > overlap) {
							group[[ current_group ]] = c(group[[ current_group ]], i)

						} else {
							# current column is not under same measurement as previous columns
							current_range = range2
							current_group = current_group + 1
							group[[ current_group ]] = i
						}
					}

				} else {
					current_range = NULL
					if(length(unique(df[[i]])) < nlevel) {
						current_group = current_group + 1
						group[[ current_group ]] = i
					}
				}
			}
		}

		if(is.null(group_names)) {
			for(i in seq_along(group)) {
				if(length(group[[i]]) > 1) {
					group_names[i] = paste0("matrix_", i)
				} else if(length(group[[i]]) == 1) {
					group_names[i] = cn[ group[[i]] ]
				}
			}
		}

		if(is.null(main_heatmap)) {
			main_heatmap = which.max(sapply(group, length))
		} else if(!is.numeric(main_heatmap) && !is.null(group_names)) {
			main_heatmap = which(group_names == main_heatmap)[1]
		}

		i_max = max(unlist(group))
		for(i in seq_along(group)) {
			ci = group[[i]]

			if(is.null(main_heatmap)) {
				if(length(ci) > 1) {
					main_heatmap = i
				}
			}

			if(i == main_heatmap) {
				split2 = split
				km2 = km
			} else {
				split2 = NULL
				km2 = 1
			}

			if(length(ci) > 1) {
				column_title = group_names[i]
			} else {
				column_title = character(0)
			}
			
			if(i == 1) {
				if(i == i_max) {
					ht_list = Heatmap(df[, ci, drop = FALSE], name = group_names[i], column_title = column_title, cluster_rows = cluster_rows, cluster_columns = cluster_columns, show_row_names = show_row_names, show_column_names = show_column_names, km = km2, split = split2, row_order = row_order)
				} else {
					ht_list = Heatmap(df[, ci, drop = FALSE], name = group_names[i], column_title = column_title, cluster_rows = cluster_rows, cluster_columns = cluster_columns, show_row_names = FALSE, show_column_names = show_column_names, km = km2, split = split2, row_order = row_order)
				}
			} else {
				if(i == i_max) {
					ht_list = ht_list + Heatmap(df[, ci, drop = FALSE], name = group_names[i], column_title = column_title, cluster_rows = cluster_rows, cluster_columns = cluster_columns, show_row_names = show_row_names, show_column_names = show_column_names, km = km2, split = split2, row_order = row_order)	
				} else {
					ht_list = ht_list + Heatmap(df[, ci, drop = FALSE], name = group_names[i], column_title = column_title, cluster_rows = cluster_rows, cluster_columns = cluster_columns, show_row_names = FALSE, show_column_names = show_column_names, km = km2, split = split2, row_order = row_order)
				}
			}
			
		}

	} else {
		stop("`table` can only be a matrix or a data frame.")
	}

	draw(ht_list, main_heatmap = main_heatmap, ...)
}
eilslabs/ComplexHeatmap documentation built on May 16, 2019, 1:21 a.m.