ggjammaplot | R Documentation |
Produce MA-plot of omics data, where jammaplot()
uses base R graphics,
ggjammaplot()
uses ggplot2 graphics.
ggjammaplot(
x,
detail_factor = 1,
nbin_factor = 1,
bw_factor = 1,
assay_name = 1,
useMedian = FALSE,
controlSamples = NULL,
centerGroups = NULL,
controlFloor = NA,
naControlAction = c("row", "floor", "min", "na"),
naControlFloor = 0,
colramp = c("transparent", "lightblue", "blue", "navy", "orange", "orangered2"),
groupedX = TRUE,
grouped_mad = TRUE,
outlierMAD = 5,
mad_row_min = 4,
displayMAD = FALSE,
noise_floor = 0,
noise_floor_value = NA,
naValue = NA,
centerFunc = centerGeneData,
whichSamples = NULL,
useRank = FALSE,
apply_transform_limit = 40,
titleBoxColor = "lightgoldenrod1",
titleCex = 1,
outlierColor = "lemonchiffon",
fillBackground = TRUE,
maintitle = NULL,
subtitle = NULL,
summary = "Mean",
difference = "Difference",
transFactor = 0.25,
doPlot = TRUE,
highlightPoints = NULL,
highlightPch = 21,
highlightCex = 1.5,
highlightColor = NULL,
doHighlightLegend = TRUE,
ablineH = c(-2, 0, 2),
base_size = 12,
panel.grid.major.colour = "grey90",
panel.grid.minor.colour = "grey95",
axis.text.x.angle = 90,
return_type = c("ggplot", "data"),
xlim = NULL,
ylim = c(-6, 6),
ncol = NULL,
nrow = NULL,
blankPlotPos = NULL,
verbose = FALSE,
...
)
jammaplot(
x,
assay_name = NULL,
maintitle = NULL,
titleBoxColor = "#DDBB9977",
subtitleBoxColor = titleBoxColor,
centerGroups = NULL,
controlSamples = colnames(x),
controlFloor = NA,
naControlAction = c("row", "floor", "min", "na"),
naControlFloor = 0,
controlIndicator = c("labelstar", "titlestar", "none"),
sample_labels = NULL,
useMedian = FALSE,
useMean = NULL,
ylim = c(-4, 4),
xlim = NULL,
highlightPoints = NULL,
outlierMAD = 5,
outlierRowMin = 5,
displayMAD = FALSE,
groupedMAD = TRUE,
colramp = c("white", "lightblue", "blue", "navy", "orange", "orangered2"),
colrampOutlier = NULL,
outlierColor = "lemonchiffon",
whichSamples = NULL,
maintitleCex = 1.8,
subtitle = NULL,
subtitlePreset = "bottomleft",
subtitleAdjPreset = "topright",
titleCexFactor = 1,
titleCex = NULL,
doTitleBox = TRUE,
titleColor = "black",
titleFont = 2,
titlePreset = "top",
titleAdjPreset = "top",
xlab = "",
xlabline = 2,
ylab = "",
ylabline = 1.5,
groupSuffix = NULL,
highlightPch = 21,
highlightCex = 1.5,
highlightColor = "#00AAAA66",
doHighlightPolygon = FALSE,
highlightPolygonAlpha = 0.3,
doHighlightLegend = TRUE,
smoothPtCol = "#00000055",
margins = c(2.5, 0.5, 2, 0.2),
outer_margins = c(0, 1.5, 0, 0.2),
useRaster = TRUE,
ncol = NULL,
nrow = NULL,
doPar = TRUE,
las = 2,
groupedX = TRUE,
customFunc = NULL,
filterNA = TRUE,
filterNAreplacement = NA,
filterNeg = FALSE,
noise_floor = 0,
noise_floor_value = NA,
filterFloor = NULL,
filterFloorReplacement = NULL,
transFactor = 0.18,
nrpoints = 0,
smoothScatterFunc = jamba::plotSmoothScatter,
applyRangeCeiling = TRUE,
doTxtplot = FALSE,
ablineV = 0,
ablineH = c(-2, 0, 2),
blankPlotPos = NULL,
fillBackground = TRUE,
useRank = FALSE,
apply_transform_limit = 40,
panel_hook_function = NULL,
doPlot = TRUE,
verbose = FALSE,
...
)
x |
|
detail_factor |
|
nbin_factor |
|
bw_factor |
|
assay_name |
|
useMedian |
|
controlSamples |
|
centerGroups |
|
colramp |
one of several inputs recognized by
|
groupedX |
|
grouped_mad |
|
outlierMAD |
|
mad_row_min |
|
displayMAD |
|
noise_floor , noise_floor_value |
|
naValue |
|
centerFunc |
|
whichSamples |
|
useRank |
|
titleBoxColor |
|
outlierColor |
|
fillBackground |
|
maintitle |
|
subtitle |
|
transFactor |
|
doPlot |
|
highlightPoints |
optional set of rows to highlight on each MA-plot panel, drawn as a set of points on top of the in one of the following forms:
|
highlightCex |
|
highlightColor |
|
doHighlightLegend |
|
ablineH , ablineV |
|
xlim |
|
ylim , xlim |
|
ncol , nrow |
|
blankPlotPos |
|
verbose |
logical indicating whether to print verbose output. |
... |
additional parameters sent to downstream functions,
|
useMean |
(deprecated) |
outlierRowMin |
|
groupedMAD |
|
colrampOutlier |
one of several inputs recognized by
|
maintitleCex |
|
subtitlePreset |
|
subtitleAdjPreset |
|
doTitleBox |
|
titleColor |
|
titleFont |
|
titlePreset |
|
titleAdjPreset |
|
xlab , ylab |
|
xlabline , ylabline |
|
groupSuffix |
(deprecated) |
doHighlightPolygon |
|
highlightPolygonAlpha |
|
smoothPtCol |
|
margins |
|
outer_margins |
|
useRaster |
|
doPar |
|
las |
|
customFunc |
optional |
filterNA , filterNAreplacement |
|
filterNeg |
(deprecated) |
filterFloor , filterFloorReplacement |
(deprecated) in favor of
|
nrpoints |
|
smoothScatterFunc |
|
applyRangeCeiling |
|
doTxtplot |
|
panel_hook_function |
optional custom
|
titleBoxColor , subtitleBoxColor |
|
jammaplot
takes a numeric matrix, typically of gene expression data,
and produces an MA-plot (Bland-Altman plot), also known as a
median-difference plot. One panel is created for each column of
data. Within each panel, the x-axis represents the mean or median
expression of each row; the y-axis represents the difference from
mean or median for that column.
By default, the plot uses jamba::plotSmoothScatter()
, with optional
highlighted points draw using points()
.
The function will determine an appropriate layout of plot panels,
which can be overridden using ncol
and nrow
to specify the
number of columns and rows of plot panels, respectively. For now,
this function uses base R graphics instead of ggplot2, in order
to accomodate some custom features.
This function uses "useRaster=TRUE" by default, which causes
jamba::plotSmoothScatter()
to render a rasterized image as opposed
to a composite of colored rectangles. This process substantially
reduces the render time in all cases, and reduces the image size
when saving as PDF or SVG.
Specific points can be highlighted with argument highlightPoints
which can be a vector or named list of vectors, containing rownames(x)
.
When using a list, point colors are assigned to each element in the
list in order, using the argument highlightColor
.
Typical MA-plots are "global-centered", which calculates the
mean/median across all columns in x
, and this value is subtracted
from each individual value per row.
By specifying controlSamples
the mean/median is calculated using only the colnames(x)
which match
controlSamples
, thus representing "difference from control."
It may also be useful to center data by known high-quality samples, so the effect of potential outlier samples is avoided.
By specifying centerGroups
as a vector of group names,
the centering is calculated within each group of colnames(x)
.
In this way, subsets of samples can be treated independently in
the MA-plots. A good example might be producing MA-plots for
"kidney" samples, and "muscle" samples, which may have
fundamentally different signal distributions. A good rule
of thumb is to apply centerGroups
to represent separate
groups of samples where you do not intend to apply direct
statistical comparisons across those samples, without at
least applying a two-way contrast, a fold change of fold
changes.
Another informative technique is to center by sample group,
for example centerGroups=sample_group
.
This technique produces MA-plots that depict the
"difference from group" for each sample replicate of a sample
group, and is very useful for identifying sample replicates
with markedly higher variability to its sample group than
others. In general, the variability within sample group
should be substantially lower than variability across
sample groups. Use displayMAD=TRUE
and outlierMAD=2
as a recommended starting point for this technique.
The argument noise_floor
provides a numeric lower threshold,
where individual values at or below this threshold are
set to a defined value, defined by argument noise_floor_value
.
The default was updated in version 0.0.21.900
to
noise_floor=0
and noise_floor_value=NA
.
Values of zero 0
are set to NA
and therefore are not included
in the MA-plot calculations. Only points above zero are included
as points in each MA-plot panel.
Another useful alternative is to define noise_floor_value=noise_floor
which sets any measurement at or below the noise_floor
to
this value. This option has the effect of reducing random noise from
points that are already below the noise threshold and therefore
are unreliable for this purpose.
Panels are drawn using the order of colnames(x)
by row,
from left-to-right, then top-to-bottom.
The argument blankPlotPos
is intended to insert an empty panel
at a particular panel position, to help customize the alignment
of sample panels.
This option is typically used with ncol
and nrow
to define
a fixed layout of panel columns and rows. blankPlotPos
refers
to panels numbered as drawn per row of panels,
Use argument displayMAD=TRUE
to display the per-sample MAD factor
relative to its centerGroups
value, if provided. The MAD value
for each MA-plot panel is calculated using rows whose mean
is at or above outlierRowMin
. The median MAD value is calculated
for each centerGroups
grouping when groupedMAD=TRUE
, by default.
Finally, each MA-plot panel MAD factor is the ratio of its MAD value
to the relevant median MAD value. MA-plot panels with MAD factor
above outlierMAD
are considered outliers, and the color ramp
uses outlierColramp
or outlierColor
as a visual cue.
Putative outlier samples should usually not be determined when:
controlSamples
are defined to include only a subset
of sample groups,
centerGroups
is not defined, or represents more than one
set of sample groups that are not intended to be statistically
compared directly to one another.
Putative outlier samples may be defined when:
centerGroups
represents a set of sample groups that are
intended to be involved in direct comparisons
centerGroups
represents each sample group
Potential sample outliers may be identified by setting a threshold
with outlierMAD
, by default 5xMAD. For a sample to be considered
an outlier, its median difference from mean/median needs to be
five times higher than the median across samples.
We typically recommend an outlierMAD=2
when centering
by sample groups, or when centering within experiment subsets.
For one sample to have 2xMAD factor, its variance needs
to be uniquely twice as high as the majority of other samples, which
is typically symptomatic of possible technical failure.
There are exceptions to this suggested guideline, which includes scenarios where a batch effect may be involved.
To do:
Accept other object types as input, including Bioconductor
classes: ExpressionSet
, SummarizedExperiment
,
MultiExperimentSet
Make it efficient to convey group information, for example
define titleBoxColor
with group colors, allow centerByGroup=TRUE
which would re-use known sample group information.
Adjust the suffix to indicate when centerGroups
are being
used. For example indicate 'sampleID vs groupA'
instead of
'sampleID vs median'
.
list
of numeric
matrix
objects, one for each MA-plot,
with colnames "x"
and "y"
. This list
is sufficient input
to jammaplot()
to re-create the full set of MA-plots.
ggjammaplot()
:
jamba::plotSmoothScatter()
Other jam plot functions:
volcano_plot()
Other jam plot functions:
volcano_plot()
if (jamba::check_pkg_installed("SummarizedExperiment") &&
jamba::check_pkg_installed("farrisdata")) {
suppressPackageStartupMessages(require(SummarizedExperiment));
GeneSE <- farrisdata::farrisGeneSE;
titleBoxColor <- jamba::nameVector(
farrisdata::colorSub[as.character(
SummarizedExperiment::colData(GeneSE)$groupName)],
colnames(GeneSE));
options("warn"=FALSE);
gg <- ggjammaplot(GeneSE,
ncol=6,
base_size=12,
maintitle="Farris raw RNAseq data",
titleBoxColor=jamba::rgb2col(col2rgb(titleBoxColor)),
assay_name="raw_counts")
gg <- ggjammaplot(GeneSE,
ncol=6,
assay_name="counts",
useRank=TRUE,
ylim=c(-11000, 11000),
maintitle="MA-plots by rank and rank difference",
titleBoxColor=titleBoxColor)
gg <- ggjammaplot(GeneSE,
ncol=6,
assay_name="counts",
titleBoxColor=titleBoxColor,
base_size=10,
maintitle="MA-plots showing MAD factor",
displayMAD=TRUE)
gg <- ggjammaplot(GeneSE,
ncol=6,
assay_name="counts",
titleBoxColor=titleBoxColor,
maintitle="MA-plot omitting one panel, then using blankPlotPos",
whichSamples=colnames(GeneSE)[c(1:21, 23:24)],
blankPlotPos=22,
displayMAD=TRUE)
if (FALSE) {
ggdf <- ggjammaplot(GeneSE,
assay_name="counts",
whichSamples=c(1:3, 7:9),
return_type="data",
titleBoxColor=titleBoxColor)
highlightPoints1 <- names(jamba::tcount(subset(ggdf, mean > 15 & difference < -1)$item, 2))
highlightPoints2 <- subset(ggdf, name %in% "CA1CB492" &
difference < -4.5)$item;
highlightPoints <- list(
divergent=highlightPoints1,
low_CA1CB492=highlightPoints2);
ggdf_h <- ggjammaplot(GeneSE,
assay_name="counts",
highlightPoints=highlightPoints,
whichSamples=c(1:3, 7:9),
return_type="data",
titleBoxColor=titleBoxColor)
# you can use output from `jammaplot()` as input to `ggjammaplot()`:
jp2 <- jammaplot(GeneSE,
outlierMAD=2,
doPlot=FALSE,
assay_name="raw_counts",
filterFloor=1e-10,
filterFloorReplacement=NA,
centerGroups=colData(GeneSE)$Compartment,
subtitleBoxColor=farrisdata::colorSub[as.character(colData(GeneSE)$Compartment)],
useRank=FALSE);
gg1 <- ggjammaplot(jp2,
ncol=6,
titleBoxColor=titleBoxColor);
print(gg1);
}
}
# Note the example data requires the affydata Bioconductor package
if (suppressPackageStartupMessages(require(affydata))) {
data(Dilution);
edata <- log2(1+exprs(Dilution));
jammaplot(edata);
jammaplot(edata,
whichSamples=c(1, 2));
jammaplot(edata,
sample_labels=paste("Sample", colnames(edata)));
jammaplot(edata,
controlIndicator="titlestar");
jammaplot(edata,
controlIndicator="none");
jammaplot(edata,
panel_hook_function=function(i,...){box("figure")});
jammaplot(edata,
useRank=TRUE,
maintitle="Rank MA-plots");
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.