dittoHex: Show RNAseq data, grouped into hexagonal bins, on a scatter...
In dittoSeq: User Friendly Single-Cell and Bulk RNA Sequencing Visualization

Description Usage Arguments Details Value Functions Many characteristics of the plot can be adjusted using discrete inputs Additional Features Author(s) See Also Examples

Show RNAseq data, grouped into hexagonal bins, on a scatter or dimensionality reduction plot

dittoDimHex(
  object,
  color.var = NULL,
  bins = 30,
  color.method = NULL,
  reduction.use = .default_reduction(object),
  dim.1 = 1,
  dim.2 = 2,
  cells.use = NULL,
  color.panel = dittoColors(),
  colors = seq_along(color.panel),
  split.by = NULL,
  extra.vars = NULL,
  split.nrow = NULL,
  split.ncol = NULL,
  assay = .default_assay(object),
  slot = .default_slot(object),
  adjustment = NULL,
  assay.extra = assay,
  slot.extra = slot,
  adjustment.extra = adjustment,
  show.axes.numbers = TRUE,
  show.grid.lines = !grepl("umap|tsne", tolower(reduction.use)),
  main = "make",
  sub = NULL,
  xlab = "make",
  ylab = "make",
  theme = theme_bw(),
  do.contour = FALSE,
  contour.color = "black",
  contour.linetype = 1,
  min.density = NA,
  max.density = NA,
  min.color = "#F0E442",
  max.color = "#0072B2",
  min.opacity = 0.2,
  max.opacity = 1,
  min = NA,
  max = NA,
  rename.color.groups = NULL,
  do.ellipse = FALSE,
  do.label = FALSE,
  labels.size = 5,
  labels.highlight = TRUE,
  labels.repel = TRUE,
  labels.split.by = split.by,
  add.trajectory.lineages = NULL,
  add.trajectory.curves = NULL,
  trajectory.cluster.meta,
  trajectory.arrow.size = 0.15,
  data.out = FALSE,
  legend.show = TRUE,
  legend.color.title = "make",
  legend.color.breaks = waiver(),
  legend.color.breaks.labels = waiver(),
  legend.density.title = if (isBulk(object)) "Samples" else "Cells",
  legend.density.breaks = waiver(),
  legend.density.breaks.labels = waiver()
)

dittoScatterHex(
  object,
  x.var,
  y.var,
  color.var = NULL,
  bins = 30,
  color.method = NULL,
  split.by = NULL,
  extra.vars = NULL,
  cells.use = NULL,
  color.panel = dittoColors(),
  colors = seq_along(color.panel),
  split.nrow = NULL,
  split.ncol = NULL,
  assay.x = .default_assay(object),
  slot.x = .default_slot(object),
  adjustment.x = NULL,
  assay.y = .default_assay(object),
  slot.y = .default_slot(object),
  adjustment.y = NULL,
  assay.color = .default_assay(object),
  slot.color = .default_slot(object),
  adjustment.color = NULL,
  assay.extra = .default_assay(object),
  slot.extra = .default_slot(object),
  adjustment.extra = NULL,
  min.density = NA,
  max.density = NA,
  min.color = "#F0E442",
  max.color = "#0072B2",
  min.opacity = 0.2,
  max.opacity = 1,
  min = NA,
  max = NA,
  rename.color.groups = NULL,
  xlab = x.var,
  ylab = y.var,
  main = "make",
  sub = NULL,
  theme = theme_bw(),
  do.contour = FALSE,
  contour.color = "black",
  contour.linetype = 1,
  do.ellipse = FALSE,
  do.label = FALSE,
  labels.size = 5,
  labels.highlight = TRUE,
  labels.repel = TRUE,
  labels.split.by = split.by,
  add.trajectory.lineages = NULL,
  add.trajectory.curves = NULL,
  trajectory.cluster.meta,
  trajectory.arrow.size = 0.15,
  legend.show = TRUE,
  legend.color.title = "make",
  legend.color.breaks = waiver(),
  legend.color.breaks.labels = waiver(),
  legend.density.title = if (isBulk(object)) "Samples" else "Cells",
  legend.density.breaks = waiver(),
  legend.density.breaks.labels = waiver(),
  data.out = FALSE
)

`object`	A Seurat, SingleCellExperiment, or SummarizedExperiment object.
`color.var`	Single string giving a gene or metadata that will set the color of cells/samples in the plot. Alternatively, can be a directly supplied numeric or string vector or a factor of length equal to the total number of cells/samples in `object`.
`bins`	Numeric or numeric vector giving the number of haxagonal bins in the x and y directions. Set to 30 by default.
`color.method`	Works differently depending on whether the color.var is continous versus discrete: Continuous: String signifying a function for how target data should be summarized for each bin. Can be any function that summarizes a numeric vector input with a single numeric output value. Default is `median`. Other useful options are `sum`, `mean`, `sd`, or `mad`. Discrete: A string signifying whether the color should (default) be simply based on the "max" grouping of the bin, or based on the "max.prop"ortion of cells/samples belonging to any grouping.
`reduction.use`	String, such as "pca", "tsne", "umap", or "PCA", etc, which is the name of a dimensionality reduction slot within the object, and which sets what dimensionality reduction space within the object to use. Default = the first dimensionality reduction slot inside the object with "umap", "tsne", or "pca" within its name, (priority: UMAP > t-SNE > PCA) or the first dimensionality reduction slot if none of those exist. Alternatively, a matrix (or data.frame) containing the dimensionality reduction embeddings themselves. The matrix should have as many rows as there are cells/samples in the `object`. Note that `dim.1` and `dim.2` will still be used to select which columns to pull from, and column names will serve as the default `xlab` & `ylab`.
`dim.1`	The component number to use on the x-axis. Default = 1
`dim.2`	The component number to use on the y-axis. Default = 2
`cells.use`	String vector of cells'/samples' names OR an integer vector specifying the indices of cells/samples which should be included. Alternatively, a Logical vector, the same length as the number of cells in the object, which sets which cells to include.
`color.panel`	String vector which sets the colors to draw from. `dittoColors()` by default, see `dittoColors` for contents.
`colors`	Integer vector, the indexes / order, of colors from color.panel to actually use
`split.by`	1 or 2 strings naming discrete metadata to use for splitting the cells/samples into multiple plots with ggplot faceting. When 2 metadatas are named, c(row,col), the first is used as rows and the second is used for columns of the resulting grid. When 1 metadata is named, shape control can be achieved with `split.nrow` and `split.ncol`
`extra.vars`	String vector providing names of any extra metadata to be stashed in the dataframe supplied to `ggplot(data)`. Useful for making custom alterations after dittoSeq plot generation.
`split.nrow, split.ncol`	Integers which set the dimensions of faceting/splitting when a single metadata is given to `split.by`.
`assay, slot, adjustment, assay.x, assay.y, assay.color, assay.extra, slot.x, slot.y, slot.color, slot.extra, adjustment.x, adjustment.y, adjustment.color, adjustment.extra`	assay, slot, and adjustment set which data to use when the axes, coloring, or `extra.vars` are based on expression/counts data. See `gene` for additional information.
`show.axes.numbers`	Logical which controls whether the axes values should be displayed.
`show.grid.lines`	Logical which sets whether gridlines of the plot should be shown. They are removed when set to FALSE. Default = TRUE for umap and tsne `reduction.use`, FALSE otherwise.
`main`	String, sets the plot title. The default title is either "Density", `color.var`, or NULL, depending on the identity of `color.var`. To remove, set to `NULL`.
`sub`	String, sets the plot subtitle.
`xlab, ylab`	Strings which set the labels for the axes. To remove, set to `NULL`.
`theme`	A ggplot theme which will be applied before dittoSeq adjustments. Default = `theme_bw()`. See https://ggplot2.tidyverse.org/reference/ggtheme.html for other options and ideas.
`do.contour`	Logical. Whether density-based contours should be displayed.
`contour.color`	String that sets the color(s) of the `do.contour` contours.
`contour.linetype`	String or numeric which sets the type of line used for `do.contour` contours. Defaults to "solid", but see `linetype` for other options.
`min.density, max.density`	Number which sets the min/max values used for the density scale. Used no matter whether density is represented through opacity or color.
`min.color, max.color`	color for the min/max values of the color scale.
`min.opacity, max.opacity`	Scalar between [0,1] which sets the minimum or maximum opacity used for the density legend (when color is used for `color.var` data and density is shown via opacity).
`min, max`	Number which sets the values associated with the minimum or maximum color for `color.var` data.
`rename.color.groups`	String vector containing new names for the identities of discrete color groups.
`do.ellipse`	Logical. Whether the groups should be surrounded by median-centered ellipses.
`do.label`	Logical. Whether to add text labels near the center (median) of clusters for grouping vars.
`labels.size`	Size of the the labels text
`labels.highlight`	Logical. Whether the labels should have a box behind them
`labels.repel`	Logical, that sets whether the labels' placements will be adjusted with ggrepel to avoid intersections between labels and plot bounds. TRUE by default.
`labels.split.by`	String of one or two metadata names which controls the facet-split calculations for label placements. Defaults to `split.by`, so generally there is no need to adjust this except when you are utilizing the `extra.vars` input to achieve manual faceting control.
`add.trajectory.lineages`	List of vectors representing trajectory paths, each from start-cluster to end-cluster, where vector contents are the names of clusters provided in the `trajectory.cluster.meta` input. If the `slingshot` package was used for trajectory analysis, you can provide `add.trajectory.lineages = slingLineages('object')`.
`add.trajectory.curves`	List of matrices, each representing coordinates for a trajectory path, from start to end, where matrix columns represent x (`dim.1`) and y (`dim.2`) coordinates of the paths. Alternatively, (for dittoDimHex only, but not dittoScatterHex) a list of lists(/princurve objects) can be provided. Thus, if the `slingshot` package was used for trajectory analysis, you can provide `add.trajectory.curves = slingCurves('object')`
`trajectory.cluster.meta`	String name of metadata containing the clusters that were used for generating trajectories. Required when plotting trajectories using the `add.trajectory.lineages` method. Names of clusters inside the metadata should be the same as the contents of `add.trajectory.lineages` vectors.
`trajectory.arrow.size`	Number representing the size of trajectory arrows, in inches. Default = 0.15.
`data.out`	Logical. When set to `TRUE`, changes the output from the plot alone to a list containing the plot ("plot"), and data.frame of the underlying data for target cells ("data").
`legend.show`	Logical. Whether any legend should be displayed. Default = `TRUE`.
`legend.density.title, legend.color.title`	Strings which set the title for the legends.
`legend.density.breaks, legend.color.breaks`	Numeric vector which sets the discrete values to label in the density and color.var legends.
`legend.density.breaks.labels, legend.color.breaks.labels`	String vector, with same length as `legend.*.breaks`, which sets the labels for the tick marks or hex icons of the associated legend.
`x.var, y.var`	Single string giving a gene or metadata that will be used for the x- and y-axis of the scatterplot. Note: must be continuous. Alternatively, can be a directly supplied numeric vector of length equal to the total number of cells/samples in `object`.

The functions create a dataframe with x and y coordinates for each cell/sample, determined by either x.var and y.var for dittoScatterHex, or reduction.use, dim.1 (x), and dim.2 (y) for dittoDimHex. Extra data requested by color.var for coloring, split.by for faceting, or extra.var for manual external manipulations, are added to the dataframe as well. For expression/counts data, assay, slot, and adjustment inputs can be used to select which values to use, and if they should be adjusted in some way.

The dataframe is then subset to only target cells/samples based on the cells.use input.

Finally, a hex plot is created using this dataframe:

If color.var is not rovided, coloring is based on the density of cells/samples within each hex bin. When color.var is provided, density is represented through opacity while coloring is based on a summarization, chosen with the color.method input, of the target color.var data.

If split.by was used, the plot will be split into a matrix of panels based on the associated groupings.

A ggplot object where colored hexagonal bins are used to summarize RNAseq data in a scatterplot or tSNE, PCA, UMAP.

Alternatively, if data.out=TRUE, a list containing two slots is output: the plot (named 'plot'), and a data.table containing the underlying data for target cells (named 'data').

dittoDimHex: Show RNAseq data overlayed on a tsne, pca, or similar, grouped into hexagonal bins
dittoScatterHex: Make a scatter plot of RNAseq data, grouped into hexagonal bins

Colors: min.color and max.color adjust the colors for continuous data.
For discrete color.var plotting with color.method = "max", colors are instead adjusted with color.panel and/or colors & the labels of the groupings can be changed using rename.color.groups.
Titles and axes labels can be adjusted with main, sub, xlab, ylab, and legend.color.title and legend.density.title arguments.
Legends can also be adjusted in other ways, using variables that all start with "legend." for easy tab completion lookup.

Other tweaks and features can be added as well. Each is accessible through 'tab' autocompletion starting with "do."--- or "add."---, and if additional inputs are involved in implementing or tweaking these, the associated inputs will start with the "---.":

If do.contour is provided, density gradiant contour lines will be overlaid with color and linetype adjustable via contour.color and contour.linetype.
If add.trajectory.lineages is provided a list of vectors (each vector being cluster names from start-cluster-name to end-cluster-name), and a metadata name pointing to the relevant clustering information is provided to trajectory.cluster.meta, then median centers of the clusters will be calculated and arrows will be overlayed to show trajectory inference paths in the current dimmenionality reduction space.
If add.trajectory.curves is provided a list of matrices (each matrix containing x, y coordinates from start to end), paths and arrows will be overlayed to show trajectory inference curves in the current dimmenionality reduction space. Arrow size is controlled with the trajectory.arrow.size input.

Daniel Bunis with some code adapted from Giuseppe D'Agostino

dittoDimPlot and dittoScatterPlot for making very similar data representations, but where each cell is represented individually. It is often best to investigate your data with both the individual and hex-bin methods, then pick whichever is the best representation for your particular goal.

getGenes and getMetas to see what the var, split.by, etc. options are of an object.

getReductions to see what the reduction.use options are of an object.

example(importDittoBulk, echo = FALSE)
myRNA

# Mock up some nCount_RNA and nFeature_RNA metadata
#  == the default way to extract
myRNA$nCount_RNA <- runif(60,200,1000)
myRNA$nFeature_RNA <- myRNA$nCount_RNA*runif(60,0.95,1.05)
# and also percent.mito metadata
myRNA$percent.mito <- sample(c(runif(50,0,0.05),runif(10,0.05,0.2)))

dittoScatterHex(
    myRNA, x.var = "nCount_RNA", y.var = "nFeature_RNA")
dittoDimHex(myRNA)

# We don't have too many samples here, so let's increase the bin size.
dittoDimHex(myRNA, bins = 10)

# x and y bins can be set separately, useful for non-square plots
dittoDimHex(myRNA, bins = c(20, 10))

### Coloring
# Default coloring, as above, is by cell/sample density in the region, but
# 'color.var' can be used to color the data by another metric.
# Density with then be represented via bin opacity.
dittoDimHex(myRNA, color.var = "clustering", bins = 10)
dittoDimHex(myRNA, color.var = "gene1", bins = 10)

# 'color.method' is then used to adjust how the target data is summarized
dittoDimHex(myRNA, color.var = "groups", bins = 10,
    color.method = "max.prop")
dittoDimHex(myRNA, color.var = "gene1", bins = 10,
    color.method = "mean")

### Additional Features:

# Faceting with 'split.by'
dittoDimHex(myRNA, bins = 10, split.by = "groups")
dittoDimHex(myRNA, bins = 10, split.by = c("groups", "clustering"))

# Underlying data output with 'data.out = TRUE'
dittoDimHex(myRNA, data.out = TRUE)

# Contour lines can be added with 'do.contours = TRUE'
dittoDimHex(myRNA, bins = 10,
    do.contour = TRUE,
    contour.color = "lightblue", # Optional, black by default
    contour.linetype = "dashed") # Optional, solid by default

# Trajectories can be added to dittoDimHex plots (see above for details)
dittoDimHex(myRNA, bins = 10,
    add.trajectory.lineages = list(c(1,2,4), c(1,4), c(1,3)),
    trajectory.cluster.meta = "clustering")