# draft code to make plots that show mean rarity on a balanced "rarity scale."
# Goal is to make a flexible function that will take a vector of abundances and
# return a ggplot object that can be easily incorporated into figures.
# library(tidyverse)
# library(ggthemes)
# library(scales) # trans_new() is in the scales library
## TODO
## OPTIONAL: color the species (so that stacking becomes clear)
## get text and axis widths to scale with device/viewport. currently base_size
## argument can control manually
## think about squeezing when rarities aren't equal but are close s.t. boxes
## overlap... smart option that can calculate whether overlap occurs? Currently,
## have the lines=T option to plot species as lines if overlap is a problem (set
## by user)
#' Sums duplicate values, for determining \[species\] weights
#'
#' Intended for use in constructing rarity plots, this function takes a numeric
#' vector of integers representing species abundances, and returns the weights
#' for each, unique rarity value. This is to solve overplotting and convey the
#' total "weight" of all species of a given rarity.
#'
#' @param x numeric vector
#'
#' @return A vector of weights, where only unique quantities have weights. The
#' length of the new vector could be much shorter than \code{length(x)}.
#' @export
#' @examples
#' combfun(c(1, 2, 3, 3, 3, 4, 5, 9))
combfun <- function(x){
x = x[order(x)]
y = unique(x) * as.numeric(table(x))
return(y)}
#' Round numbers, more aggressively the larger they are
#'
#' @param breaks is a numeric vector
#' intended for the breaks returned by \code{scales::transbreaks}
#'
#' @return numeric vector of same length as input, rounded nicely
#'
#' @noRd
#' @examples prettify(sqrt(2:7))
#' @examples prettify(sqrt(seq(2e3, 2e5, 4739)))
#'
prettify <- function(breaks){
digits <- -floor(log10(abs(breaks))) + 1
digits[breaks == 0] <- 0
raw <- round(breaks, digits = digits)
raw[raw == -Inf] <- 1e9
# raw[is.na(raw)]<-1e11
# raw[raw==0]<-1e-11
return(raw) #klugey fix to -Inf
}
#' Power scale transformation
#'
#' This is a function for nice scale transformations for \code{ggplot2},
#' including when \code{pow < 0}.
#'
#' This function is for power transformations (i.e. raising to a power and
#' inverse) This is useful for visualizing generalized means.
#'
#' @param pow Exponent of power transformation, scalar.
#' @param nb Number of desired breaks (approximate), scalar.
#'
#' @return A scale transformation object for plotting in ggplot.
#' @seealso \code{\link[scales]{trans_new}}, \code{\link[scales]{trans_breaks}},
#' \code{\link{pfun}}, \code{\link{ipfun}}
#' @export
power_trans = function(pow, nb) scales::trans_new(name = "power"
, transform = function(x){pfun(x, pow)}
, inverse = function(x){ipfun(x, pow)}
, breaks = function(x){ prettify(
scales::trans_breaks(
trans = function(x){pfun(x, pow)}
, inv = function(x) {ipfun(x, pow)}
, n = nb)(x * 1.1)
)
}
, domain = c(1, 10000) #this is to deal with -Inf
)
# Note that this function requires integer abundances. Overall, no great reason
# not to adjust rarity_plot to include fractions with the lines = TRUE
#' Replicate abundance vector to make stacks with geom_point
#'
#' @param df data.frame of abundances and rarities.
#'
#' @return data.frame with 1 row per individual for each species for plotting.
#'
#' @noRd
fancy_rep <- function(df){
if(!all.equal(df$ab, as.integer(df$ab))){stop("rarity plot requires integer abundances")}
mydf = data.frame(df[rep(1:nrow(df), df$ab), ])
return(mydf %>%
dplyr::group_by(.data$ab) %>%
dplyr::mutate(gr = 1:length(.data$ab)
, inds = rep(length(.data$ab)
, length(.data$ab)))
)
}
#' Base plot onto which balance plot is printed
#'
#' Takes abundance and various scale/dimension arguments to craft a ggplot
#' object for rarity plots.
#'
#' @template ab_template
#' @param fill_col Color for filling each of the boxes representing a single
#' individual.
#' @param y_extent Scalar, how tall to draw y-axis.
#' @param x_max Scalar, approximately how far right should x-axis extend.
#' @param x_min Scalar, approximately how far left should x-axis extend.
#' @param base_size Typeface size for ggplot text (scalar).
#' @param noco Scalar, shrinks text and points if plotting multiple balance
#' plots in a single plotting window
#' @param lines Logical, should each individual be plotted as a "box" or should
#' individuals be summarized simply as the height of a line segment
#' @param verbose Logical, should the function return a pile of text
#'
#' @return ggplot object with some elements of a balance plot
#'
#' @noRd
base_plot <- function(ab
, pointScale
, fill_col = "lightgrey" #can set to match communities
, y_extent = max(max(ab), 15) #how tall to draw y
, x_max = sum(ab)/min(ab) #plots a point to extend x
, x_min = sum(ab)/max(ab) # point to extend x
, base_size = 24 #controls text size, default for 7" sq plotting device
, noco = 1 #number of columns, shrinks text and point size proportional to number of panels
, lines = F
, verbose = T
){
#make plotting data
rf <- tibble::tibble(names = as.factor(1:length(ab))
, ab
, rarities = sum(ab)/ab
)
rfrepeated <- fancy_rep(rf)
y_extent <- max(y_extent, max(combfun(ab)))
#14 is empirically derived scaling factor; but isn't quite right.
# Seems like stuff below axis is about 2.5* height of 1 line of text
typeface_size = 14
# typeface_size = 15 # quick experiment
pointScale <- (typeface_size *
(min(grDevices::dev.size("cm")) /
noco - (2.5 * 1/ggplot2::.pt/10 * base_size)
)
)
pointsize <- pointScale / (y_extent * 1.1)
#0.5; shape is centered on x,y; offset so it rests upon x, y-1
goff <- 0.5
#ggplot command to generate basic plot object
base <- (rfrepeated %>%
ggplot2::ggplot(ggplot2::aes(x = .data$rarities, y = ab)) +
(if(lines == T){
rfdull <- rf %>%
dplyr::group_by(.data$rarities) %>%
dplyr::summarize(inds = sum(.data$ab))
#line segments
ggplot2::geom_segment(data = rfdull,
ggplot2::aes(x = .data$rarities
, xend = .data$rarities
, y = .data$inds
, yend = 0)
, color = grDevices::rgb(0,0,0,0.4)
, size = 1
)
} else{
#bricks
ggplot2::geom_point(ggplot2::aes(y = .data$gr - goff, alpha = 0.2)
, size = pointsize
, fill = fill_col
, shape = 22
, color = "black"
, stroke = 0.5/noco)
})
# plank
+ ggplot2::geom_segment(
ggplot2::aes(x = .data$x, y = .data$y
, xend = .data$xend, yend = .data$yend)
, data = data.frame(
x = c(min(rf$rarities))
, y = c(0)
, xend = c(max(rf$rarities))
, yend = c(0)
)
)
#fix plank location and add space above and below data range
+ggplot2::scale_y_continuous(
expand = c(0,0)
, limits = c(y_extent-1.05 * y_extent, 1.05 * y_extent)
)
+ ggplot2::labs(y = "individuals")
)
#calls the function theme plot to generate basic figure
return(theme_plot(base, base_size = base_size, noco = noco))
}
#' Set preferences for axes etc. for balance plots
#'
#' @param p ggplot object
#' @param base_size Typeface size for ggplot text (scalar).
#' @param noco Scalar, shrinks text and points if plotting multiple balance plots
#' in a single plotting window
#' @param ... Additional arguments passed to other functions.
#'
#' @return A ggplot object, with some theme elements specified
#'
#' @noRd
theme_plot <- function(p, base_size = 24, noco = 1, ...){
return(p
+ ggthemes::theme_tufte(base_family = "sans", base_size=base_size/noco)
+ ggplot2::theme(legend.position = "none")
+ ggplot2::theme(
axis.text = ggplot2::element_text(color = "black") # better than Wickham's grey?
, axis.line.x = ggplot2::element_line(colour = 'black'
, linewidth = 0.2
, linetype = 'solid')
, axis.line.y = ggplot2::element_line(colour = 'black'
, linewidth = 0.2
, linetype = 'solid')
)
)
}
#' Rescales baseplot x-axis, adds space to allow matching scales between comms
#'
#' @template l_template
#' @param fill_col Color for filling each of the boxes representing a single
#' individual.
#' @param y_extent Scalar, how tall to draw y-axis.
#' @param x_max Scalar, approximately how far right should x-axis extend.
#' @param x_min Scalar, approximately how far left should x-axis extend.
#' @param noco Scalar, shrinks text and points if plotting multiple balance
#' plots in a single plotting window
#' @param lines Logical, should each individual be plotted as a "box" or should
#' individuals be summarized simply as the height of a line segment
#' @param nbreaks Integer, approximate number of x-axis tick marks
#' @param ... Additional arguments passed to other functions.
#'
#' @return a gpplot object with some of the theme items set
#'
#' @noRd
scale_plot <- function(
ab
, l
, fill_col = "lightgrey"
, y_extent = max(max(ab), 15)
, x_max = sum(ab)/min(ab)
, x_min = sum(ab)/max(ab)
, noco = 1
, lines = F
, nbreaks = 5
, ...
){
return(base_plot(ab
, fill_col = fill_col
, y_extent = y_extent
, x_max = x_max
, x_min = x_min
, noco = noco
, lines = lines
, ...)
+ ggplot2::scale_x_continuous(trans = power_trans(
pow = l, nb = nbreaks), labels = signif)
+ ggplot2::geom_point(ggplot2::aes(.data$x, .data$y) #allows for x min and max points to determine axes
, data = tibble::tibble(x = c(x_max, x_min), y = c(0,0))
, color = "white", alpha=0)
)
}
#' Reference points at means with power \code{l}
#'
#' @template ab_template
#' @template l_template
#' @param noco Scalar, shrinks text and points if plotting multiple balance
#' plots in a single plotting window.
#'
#' @return geom object to add to a ggplot object to construct balance plot
#'
#' @noRd
mean_points <- function(ab, l, noco = 1){
ab <- ab[ab != 0]
div <- Vectorize(rarity, vectorize.args = ("l"))(ab, l)
pointDat = tibble::tibble(x = div, y = 0*div, clr = 1:length(div))
return(ggplot2::geom_point(data = pointDat
, ggplot2::aes(x = .data$x, y = .data$y, color = as.factor(.data$clr))
, size = 0.2 * min(grDevices::dev.size("cm")) / noco
))
}
#' Plot the fulcrum
#'
#' @template l_template
#' @param fill_col Color for filling each of the boxes representing a single
#' individual.
#' @param y_extent Scalar, how tall to draw y-axis.
#' @param x_max Scalar, approximately how far right should x-axis extend.
#' @param x_min Scalar, approximately how far left should x-axis extend.
#' @param noco Scalar, shrinks text and points if plotting multiple balance
#' plots in a single plotting window.
#' @param nbreaks Integer, approximate number of x-axis tick marks.
#' @param verbose Logical, should the function return a pile of text.
#'
#'
#' @noRd
fulcrum <- function(ab, l
, y_extent = max(max(combfun(ab)), 15)
, x_max = 1
, x_min = 1
, fill_col = "light_grey"
, base_size = 24
, noco = 1
, nbreaks = 5
, verbose = T){
ab <- ab[ab != 0]
div <- rarity(ab, l)
if(verbose == T){
print(c(paste("diversity =", div), paste("community size =", sum(ab))
, paste("max observed rarity =", sum(ab) / min(ab))
, paste("min observed rarity =", sum(ab) / max(ab))))}
# y_off = -0.03 # small amount to offset the y axis.
y_off = -1/ggplot2::.pt/10
size_adjust = 0.5 # this shrinks the fulcrum given the size of other
# points for shape = 17 to have its apex at y = 0.
fulcDat = tibble::tibble(x = div, y = y_off * y_extent)
return( ggplot2::geom_point(data = fulcDat
, size = (size_adjust * min(grDevices::dev.size("cm")) -
(2.5 * 1/ggplot2::.pt/10 * base_size)) /
noco # scales with plotting device and number of columns
# , size=rel(0.3)
, shape = 17
, ggplot2::aes(.data$x, .data$y)
)
)
}
#' Construct rarity balance plot
#'
#' This function takes the abundance vector, scaling exponent, and target means
#' (Default the Pythagorean means), and returns a formatted 1-panel ggplot
#' object.
#'
#' Hill diversity, or "mean rarity," is the balance point for the community
#' along the rarity scale. The image produced by \code{rarity_plot} illustrates
#' this balance. Each block represents an individual: because Hill
#' diversities are weighted by abundance, the “mass” of each “block” is the same
#' regardless of species identity. Each individual’s x-axis value is given its
#' species’s "rarity," which is the reciprocal of its relative abundance. The
#' parameter \code{l} controls how rarity is scaled. A community’s balance
#' point along the rarity scale, pictured as a triangular fulcrum, is the mean
#' rarity, or diversity, of the community.
#'
#' To ease comparison across scales, by default the Pythagorean means
#' (\url{https://en.wikipedia.org/wiki/Mean}) are marked
#' with reference points: the arithmetic mean with a rose dot, the geometric
#' mean with a blue dot, and the harmonic mean with a green dot. The arithmetic
#' scale provides high leverage to very rare species; although they carry little
#' weight (few individuals), these species influence the mean a great deal
#' because they sit far to the right of the rarity scale. The arithmetic mean
#' rarity of the community is the Hill diversity when \eqn{\ell = 1}{ℓ = 1}, and is equal to
#' species richness. The logarithmic scale provides less leverage to very rare
#' species. Thus, the geometric mean rarity of the community is lower. The
#' geometric mean rarity is also known as the Hill-Shannon diversity, or the
#' Hill diversity when \eqn{\ell = 0}{ℓ = 0}. The reciprocal scale accords more leverage to low
#' rarity values. Thus, the harmonic mean rarity, also known as the Hill-Simpson
#' diversity, or Hill diversity when \eqn{\ell = -1}{ℓ = -1}, is much lower still. An
#' interactive online application that enables users to specify species
#' abundances and the scaling parameter is available at
#' \url{https://mean-rarity.shinyapps.io/rshiny_app1/}
#'
#' The scaling of various plot elements depends on the plotting device.
#' Rstudio's seems especially touchy based on window size. The defaults here
#' pertain to the standard 7"x7" plotting window given by \code{quartz()} or
#' \code{pdf()}. They also seem to play nice on the shiny app
#' \url{https://mean-rarity.shinyapps.io/rshiny_app1/}. Other window sizes or
#' devices may require tweaking.
#'
#' @template ab_template
#' @template l_template
#' @template q_template
#' @param noco Scalar, shrinks text and points if plotting multiple balance
#' plots in a single plotting window.
#' @param lines Logical, should each individual be plotted as a "box" or should
#' individuals be summarized simply as the height of a line segment.
#' @param means Numeric vector of scaling exponent values corresponding to
#' reference (by default, Pythagorean) means.
#' @param ... Additional arguments passed to other functions.
#'
#'
#' @seealso This function depends on internal functions in the
#' \code{\link{MeanRarity}} package which can be accessed with \code{:::} e.g.
#' \code{MeanRarity:::scale_plot}.
#'
#' @concept Visualization
#'
#' @export
#' @examples
#' ab<-c(20,8,5,4,2,1)
#'
#' # experiment with other abundance vectors!
#' # ab <- c(20, 15, 9, 3, 2, 1, 1)
#' # ab <- c(100, 20, 15, 9, 3, 2, 1, 1)
#' # ab <- c(50,30,20,0,0,0)
#' # ab <- c(4,3,2)
#' # ab <- c(20, 15, 9, 3, 2, 1, 1, 0, 0)
#' # ab <- c(200,100, 20, 15, 9, 3, 2, 1, 1)
#' # ab <- floor(exp(rnorm(50, 4, 1.5)))
#'
#' richness <- rarity_plot(ab, 1)
#' Hill_Shannon <- rarity_plot(ab, 0)
#' Hill_Simpson <- rarity_plot(ab, -1)
#'
#' richness
#' Hill_Shannon
#' Hill_Simpson
#'
#' # richness + Hill_Shannon + Hill_Simpson # plot with patchwork
rarity_plot <- function(ab
, l
, q = NULL
, means = -1:1
, noco = 1
, lines = FALSE
# , l_banner = TRUE
# , title = NULL
, ...){
message(strwrap("\`rarity_plot()\` expects a square viewport (likely issues in
the RStudio plotting device) and resizes points based on
\`min(dev.size()\` and `\`noco\` (for number of columns).
\nSelecting `lines = TRUE` will plot stacks of individuals as a
line element, which tends to be more robust to window size.
\nSetting `lines = TRUE` may be the best way to deal with overplotting, which
results from several species with similar but not identical
rarities.", prefix = "\n", initial = "\n", indent = 5, exdent = 5, width = 80))
if(!is.null(q)){
l = 1-q
warning("l has been set to 1-q")
}
ab = ab[ab != 0]
myp = (scale_plot(ab
, l
, noco= noco
, lines = lines
,...)
+ mean_points(ab
, means
, noco = noco)
+ fulcrum(ab
, l
, noco = noco
, ...)
+ ggplot2::scale_color_brewer(type = "qual", palette = "Set1"))
return(myp
)
}
#' ggplot sugar for rarity plots
#'
#' @inheritParams ellnotate
#'
#' @return ggplot objects to add to rarity plot
#' @concept Visualization
#' @export
#'
#' @examples
#' p1 <- rarity_plot(1:10, l = 0)
#' p1
#' p1 + seesaw_sugar(title = "Hill-Shannon diversity"
#' , l = 0)
#'
seesaw_sugar <- function(title = NULL
, l
, unicode_in_title = TRUE
, ...
){
list(ggplot2::scale_color_brewer(type = "qual", palette = "Dark2")
, ellnotate(title = title
, l = l
, unicode_in_title = TRUE
, ...)
)
}
#' Convenience function to label rarity plots with scaling exponent
#'
#' @template l_template
#' @param unicode_in_title Logical, include unicode ell and its value.
#' @param title Character string, or NULL.
#' @param ... Additional arguments passed to other functions.
#'
#' @concept Visualization
#' @return ggplot object
#' @export
#'
#' @examples
#' myp <- rarity_plot(1:10, 0)
#' myp +
#' ellnotate("Hill-Shannon diversity", l = 0)
#'
ellnotate <- function(title = NULL
, l
, unicode_in_title = TRUE
, ...
){
annotation <- ggplot2::annotation_custom(grid::textGrob(
label = paste0("\u2113 = ", l, "\n", title)
, vjust = 1)
, xmin = -Inf
, xmax = Inf
, ymin = Inf
, ymax = Inf
)
return(annotation)
}
#' Conveniently plot for l = -1:1, with reference points in each fig
#'
#' Convenience function for plotting balance plots for the arithmetic,
#' geometric, and harmonic mean rarities with only one line of code.
#'
#' @return Prints 3 ggplots into graphics device.
#'
#' @template l_template
#' @inheritParams rarity_plot
#' @param lrange Numeric vector of scaling exponent values
#' @param unicode_in_title Logical, include label for \ell value
#' @param title Character string
#'
#'
#' @noRd
#'
rarity_series <- function(ab
, lrange = -1:1
, means = lrange
, unicode_in_title = TRUE
, title = NULL
, ...){
for(l in lrange){
rarity_plot(ab, l, means, ...)
}
}
#' Convenience function to blank y-axis elements for constructing multi-panel
#' plots
#'
#' This function whites out, but does not remove, y-axis elements for a
#' consistent plot size that can be included in a multi-panel plot
#'
#' @return A ggplot.
#'
#' @param p A ggplot.
#'
#' @export
white_y<-function(p){
return(p
+ ggplot2::theme(axis.text.y = ggplot2::element_text(color="white")
, axis.title.y = ggplot2::element_text(color="white")
, axis.ticks.y = ggplot2::element_line(color="white")
, axis.line.y = ggplot2::element_line(color="white")
, axis.line.x = ggplot2::element_line(
colour = 'black', linewidth = 0.5, linetype = 'solid'
)
)
)
}
#' Convenience function to omit y-axis elements for constructing multi-panel
#' plots
#'
#' This function removes y-axis elements for plots intended to be
#' included in a multi-panel plot. When the y-axis is omitted (rather than
#' whited-out as in \code{white_y}), the plotted area typically expands to fill
#' unused space.
#'
#' @seealso \code{\link{white_y}}
#'
#' @return A ggplot.
#'
#' @param p A ggplot.
#'
#' @export
omit_y<-function(p){
return(p
+ ggplot2::theme(axis.text.y = ggplot2::element_blank()
, axis.title.y = ggplot2::element_blank()
, axis.ticks.y = ggplot2::element_blank()
, axis.line.y = ggplot2::element_blank()
, axis.line.x = ggplot2::element_line(
colour = 'black', linewidth = 0.2, linetype = 'solid'
)
)
)
}
#' Plot a rank-abundance distribution
#'
#' Take a vector of species \[relative\] abundances and plot a rank-abundance
#' distribution or Whittaker plot.
#'
#' @template ab_template
#' @param maxrich Scalar, how many species to include space for
#' @param maxab Scalar, how big is the largest abundance value
#' @param fill Character string specifying a color, could be extended to
#' permit name for community ID variable
#' @param shape Character string for point shape, could be extended to
#' permit name for community ID variable
#' @param size Scalar, adjusts point size in ggplot object
#' @param Whittaker Logical, if \code{TRUE} log-transform abundances
#'
#' @return ggplot object with a rank-abundance plot
#'
#' @concept Visualization
#' @export
#' @examples radplot(c(20,8,5,4,2,1))
#' radplot(c(20,8,5,4,2,1), Whittaker = TRUE)
radplot <- function(ab
, maxrich = length(comm)
, maxab = max(comm)
, fill = "red"
, shape = 16
, size = 2.5
, Whittaker = FALSE
){
comm = ab[ab!=0]
if(Whittaker){comm = log(comm)}
rawrnk = tibble::tibble(abund = comm, rnk = dplyr::row_number(comm))
toplot = rawrnk %>%
dplyr::mutate(x = -.data$rnk - maxrich + max(.data$rnk))
f <- (toplot %>% ggplot2::ggplot(ggplot2::aes(.data$x, .data$abund, .data$size))
+ ggplot2::geom_point(shape = shape, color = fill, size = size)
+ ggplot2::geom_line(color = fill)
+ ggplot2::scale_x_continuous(limits = c(-maxrich, 0))
+ ggplot2::scale_y_continuous(limits = c(0, maxab))
+ ggplot2::theme_classic()
+ ggplot2::ggtitle(ifelse(Whittaker
, " rank-log(abundance) [Whittaker] plot"
, " rank-abundance plot"
)
)
+ ggplot2::theme(axis.text.x = ggplot2::element_text(color = "white")
, axis.text.y = ggplot2::element_text(colour = "black")
, legend.position = "none"
, text = ggplot2::element_text(size = 12)
)
+ ggplot2::labs(x = "abundance rank", y = ifelse(Whittaker
, "ln(individuals)"
, "individuals")
)
)
return(f)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.