nmatlist2heatmaps | R Documentation |
Make multiple coverage heatmaps
nmatlist2heatmaps(
nmatlist,
panel_groups = NULL,
title = NULL,
title_gp = grid::gpar(fontsize = 16),
caption = NULL,
upstream_length = NULL,
downstream_length = NULL,
k_clusters = 0,
min_rows_per_k = 100,
k_subset = NULL,
k_colors = NULL,
k_width = grid::unit(5, "mm"),
k_method = c("correlation", "euclidean", "pearson", "spearman"),
k_heatmap = main_heatmap,
partition = NULL,
row_title_rot = 0,
partition_counts = TRUE,
partition_count_template = "{partition_name}\n({counts} rows)",
rows = NULL,
row_order = NULL,
nmat_colors = NULL,
middle_color = "white",
nmat_names = NULL,
main_heatmap = NULL,
anno_df = NULL,
byCols = NULL,
color_sub = NULL,
anno_row_marks = NULL,
anno_row_labels = NULL,
anno_row_gp = grid::gpar(fontsize = 14),
recenter_heatmap = NULL,
recenter_range = NULL,
recenter_invert = FALSE,
restrand_heatmap = NULL,
restrand_range = NULL,
restrand_buffer = NULL,
restrand_invert = FALSE,
top_annotation = NULL,
top_anno_height = grid::unit(3, "cm"),
top_axis_side = c("right"),
legend_max_ncol = 2,
legend_base_nrow = 12,
legend_max_labels = 40,
show_heatmap_legend = TRUE,
heatmap_legend_param = NULL,
heatmap_legend_direction = "horizontal",
annotation_legend_param = NULL,
hm_nrow = 1,
transform = "none",
transform_label = NULL,
signal_ceiling = NULL,
axis_name = NULL,
axis_name_gp = grid::gpar(fontsize = 10),
axis_name_rot = 90,
column_title_gp = grid::gpar(fontsize = 14),
lens = -2,
anno_lens = 8,
pos_line = FALSE,
seed = 123,
ht_gap = grid::unit(4, "mm"),
row_anno_padding = grid::unit(4, "mm"),
column_anno_padding = grid::unit(4, "mm"),
legend_padding = grid::unit(1, "cm"),
profile_value = c("mean", "sum", "abs_mean", "abs_sum"),
profile_linetype = c(1, 5, 3),
profile_linewidth = 1.5,
ylims = NULL,
border = TRUE,
iter.max = 20,
use_raster = TRUE,
raster_quality = 1,
raster_by_magick = jamba::check_pkg_installed("magick"),
do_plot = TRUE,
do_caption = TRUE,
legend_fontsize = 10,
legend_width = grid::unit(3, "cm"),
trim_legend_title = TRUE,
padding = grid::unit(c(0.1, 0.1, 0.1, 0.1), "cm"),
return_type = c("heatmaplist", "grid"),
show_error = FALSE,
verbose = FALSE,
...
)
nmatlist |
|
panel_groups |
|
title , caption |
|
title_gp |
|
upstream_length , downstream_length |
|
k_clusters |
|
min_rows_per_k |
|
k_subset |
|
k_colors |
|
k_width |
|
k_method |
|
k_heatmap |
|
partition |
|
row_title_rot |
|
partition_counts |
|
partition_count_template |
|
rows |
optional vector to define subset rows, or specific row order:
Note that even when using a subset of |
row_order |
The
|
nmat_colors |
|
middle_color |
|
nmat_names |
|
main_heatmap |
|
anno_df |
|
byCols |
|
color_sub |
accepts input in two forms:
|
anno_row_marks |
|
anno_row_labels |
|
anno_row_gp |
|
recenter_heatmap , recenter_range , recenter_invert |
arguments
are passed to
|
restrand_heatmap , restrand_range , restrand_buffer , restrand_invert |
arguments are passed to
|
top_annotation |
|
top_anno_height |
|
top_axis_side |
|
legend_max_ncol |
|
legend_base_nrow |
|
legend_max_labels |
|
show_heatmap_legend |
|
heatmap_legend_param |
|
heatmap_legend_direction |
|
annotation_legend_param |
|
hm_nrow |
|
transform |
one of the following:
|
transform_label |
|
signal_ceiling |
Note that the ceiling is only applied to the color scale and
not to the underlying data. The row clustering and row ordering
steps use the full data range, after applying the appropriate
To apply a numeric ceiling to the data itself, it should be done
at the level of |
axis_name |
|
axis_name_gp |
object of |
axis_name_rot |
|
column_title_gp |
object |
lens |
|
anno_lens |
|
seed |
|
ht_gap |
|
row_anno_padding , column_anno_padding , legend_padding |
|
profile_value |
|
profile_linetype |
|
profile_linewidth |
|
ylims |
|
border |
|
iter.max |
|
use_raster |
|
raster_quality |
|
raster_by_magick |
|
do_plot |
|
do_caption |
|
legend_fontsize |
|
padding |
|
return_type |
|
show_error |
|
verbose |
|
... |
additional arguments are passed to
|
This function takes a list of normalizedMatrix
objects,
usually the output of coverage_matrix2nmat()
, and
produces multiple heatmaps using
EnrichedHeatmap
.
This function is intended to be a convenient wrapper to help keep each data matrix in order, to apply consistent clustering and filtering across all data matrices, and to enable optional multi-row heatmap layout.
list
with heatmap components that can be reviewed, or
optionally rendered into a figure:
"AHM"
: annotation heatmap, when anno_df
is supplied
"PHM"
: partition heatmap, when partitioning and/or k-means clustering
is used
"EH_l"
: list
of ComplexHeatmap::Heatmap
objects
"MHM"
: marked heatmap, containing optional row labels
"HM_drawn"
: when hm_nrow=1
this is the output after drawing the
heatmap, in the form: ComplexHeatmap::HeatmapList
. This object can
be drawn again if needed, or used to determine exact row orders.
"fn_params"
: list
of useful function parameters, including
some calculated during processing such as panel_groups
, ylims
,
signal_ceiling
, etc.
"hm_caption"
: character
version of heatmap captions
"adjust_df"
: data.frame
when recenter_heatmap
or
restrand_heatmap
are defined, which contains a summary of each
row, with colnames:
"summit_name"
for recentering; and
"restrand"
for restranding.
When anno_df
is provided as a data.frame
the rows are synchronized
alongside the heatmap rows. Column values are color-coded, categorical
for character
columns, and using color gradient for numeric
columns.
Rows can optionally be split by argument partition
, which can be a vector
of group values associated with rows, or one or more columns in
colnames(anno_df)
whose values are used to sub-divide the rows.
Rows can be clustered using k-means clustering with argument k_clusters
.
By default it uses k_method="correlation"
, which applies a novel
and effective correlation metric, clustering row data by the profile shape.
The typical default, which is used when the amap
R package is not
installed, is to use "euclidean"
distance, which tends to cluster
based upon signal magnitude moreso than the shape.
When k-means clustering k_clusters
and partition
are both enabled,
each partition is independently k-means clustered, which improves
results compared to applying global k-means before applying partitions.
Use min_rows_per_k
to adjust the relative number of k
clusters
based upon the number of observed rows.
Heatmaps are arranged in the following order, dependent upon the data provided:
Annotation heatmap, if anno_df
is provided.
Color assignment can be provided using color_sub
either as
a named vector of R colors whose names match values in each column,
or as a list
named by colnames(anno_df)
, with named color
assignments, or a color function
for numeric
columns.
Partition heatmap, if partition
is provided.
Enrichment heatmaps, one for each entry in nmatlist
.
Above each heatmap is the metaplot, drawn using
EnrichedHeatmap::anno_enriched()
.
When partition
and/or k_clusters
are defined,
the plot will include one profile line for each row grouping.
When show_error=TRUE
each line will also be shaded using 95%
standard deviation.
The heatmap color gradient is applied starting at zero, extending to
signal_ceiling
for each heatmap. When signal_ceiling
is <=1 it
uses the quantile of non-zero values in the matrix data, otherwise
it applies a fixed numeric maximum. Numeric values above the
signal_ceiling
threshold are colored using the maximum color.
When there are negative values, the color key uses a divergent
color scale. When nmat_colors
value for the heatmap is a single color,
the complementary color is used for negative values; otherwise it is
assumed to define a divergent color scale.
The y-axis range on metaplots is defined by observed values, and
when panel_groups
is defined, the y-axis ylim
is shared among
all heatmaps in each panel group.
Marked row heatmap, if anno_row_marks
is provided. It uses an empty
heatmap, associated with row mark annotations for a subset of row labels,
in the same order as the coverage heatmaps.
Color legends are displayed in the same order:
annotation colors for each column in anno_df
partition/cluster colors
color gradients for each coverage heatmap in order, or
when panel_groups
is provided it displays the color key for
only the first heatmap in each panel group.
Other jam coverage heatmap functions:
coverage_matrix2nmat()
,
get_nmat_ceiling()
,
nmathm_row_order()
,
recenter_nmatlist()
,
restrand_nmatlist()
,
validate_heatmap_params()
,
zoom_nmatlist()
,
zoom_nmat()
## There is a small example file to use for testing
# library(jamba)
cov_file1 <- system.file("data", "tss_coverage.matrix", package="platjam");
cov_file2 <- system.file("data", "h3k4me1_coverage.matrix", package="platjam");
cov_files <- c(cov_file1, cov_file2);
names(cov_files) <- gsub("[.]matrix",
"",
basename(cov_files));
nmatlist <- coverage_matrix2nmat(cov_files, verbose=FALSE);
sapply(nmatlist, function(nmat){attr(nmat, "signal_name")})
nmatlist2heatmaps(nmatlist);
# sometimes data transform can be helpful
nmatlist2heatmaps(nmatlist,
transform=c("log2signed", "sqrt"));
# k-means clusters, default uses euclidean distance
nmatlist2heatmaps(nmatlist, k_clusters=4,
transform=c("log2signed", "sqrt"));
# k-means clusters, "correlation" or "pearson" sometimes works better
nmatlist2heatmaps(nmatlist,
k_clusters=4,
min_rows_per_k=20,
k_method="pearson",
transform=c("log2signed", "sqrt"));
# example showing usage of top_axis_side
# and panel_groups
nmatlist2 <- nmatlist[c(1, 1, 1, 2, 2, 2)];
names(nmatlist2) <- jamba::makeNames(names(nmatlist2))
for (iname in names(nmatlist2)) {
attr(nmatlist2[[iname]], "signal_name") <- gsub("coverage", "cov", iname);
}
# top_axis_side="left"
# assumes 12x7 figure size
nmatlist2heatmaps(nmatlist2,
signal_ceiling=0.8,
nmat_colors=rep(c("firebrick", "tomato"), each=3),
panel_groups=rep(c("tss", "h3k4me1"), each=3),
ht_gap=grid::unit(4, "mm"),
top_axis_side="left",
transform=rep(c("log2signed", "sqrt"), each=3));
# top_axis_side="both"
nmatlist2heatmaps(nmatlist2,
panel_groups=rep(c("tss", "h3k4me1"), each=3),
ht_gap=grid::unit(6, "mm"),
top_axis_side="both",
transform=rep(c("log2signed", "sqrt"), each=3));
# multiple heatmap rows
nmatlist2heatmaps(nmatlist2,
k_clusters=4,
k_method="pearson",
hm_nrow=2,
panel_groups=rep(c("tss", "h3k4me1"), each=3),
ht_gap=grid::unit(6, "mm"),
top_axis_side="both",
top_anno_height=grid::unit(0.8, "cm"),
transform=rep(c("log2signed", "sqrt"), each=3));
# invent anno_df data.frame of additional annotations
anno_df <- data.frame(
tss_score=EnrichedHeatmap::enriched_score(jamba::log2signed(nmatlist[[1]])),
h3k4me1_score=EnrichedHeatmap::enriched_score(jamba::log2signed(nmatlist[[2]])),
chromosome=paste0("chr", sample(1:4, replace=TRUE, size=nrow(nmatlist[[1]])))
);
rownames(anno_df) <- rownames(nmatlist[[1]]);
nmatlist2heatmaps(nmatlist,
title="k-means clustering across both heatmaps",
k_clusters=4,
k_method="pearson",
k_heatmap=c(1, 2),
ht_gap=grid::unit(6, "mm"),
top_axis_side="left",
anno_df=anno_df,
transform=rep(c("log2signed", "sqrt"), each=3));
# example showing k-means clustering together with annotation groups
anno_df <- data.frame(
group=sample(c(1, -1, -1),
size=nrow(nmatlist[[1]]),
replace=TRUE),
row.names=rownames(nmatlist[[1]]))
# note for this example the color legends are oriented vertically
# showing how the width is adjusted
nmatlist2heatmaps(nmatlist,
heatmap_legend_direction="vertical",
k_clusters=0,
color_sub=c(`A`="firebrick", `B`="darkorchid"),
k_colors=c("firebrick", "dodgerblue"),
min_rows_per_k=50,
ht_gap=grid::unit(1, "cm"),
k_method="correlation",
k_heatmap=1:2,
anno_df=anno_df,
partition="group",
row_title_rot=0,
transform=rep(c("log2signed", "sqrt"), each=3));
# same as above, partition and k_clusters together
# except uses multiple values for k_clusters
nmatlist2heatmaps(nmatlist,
k_clusters=c(1, 4),
min_rows_per_k=25,
k_heatmap=1:2,
k_method="correlation",
anno_df=anno_df,
partition="group",
row_title_rot=0)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.