# Miscellaneous function to support working with iec functions.
#
# Authors: Nicholas G. Walton and Robert W. Howe
# Created 14 Oct 2014
# Last modified: 25 Sept 2015
## Nonlinear R^2 ----
#' Nonlinear R-squared
#'
#' \code{nlr2} calculates a nonlinear R-squared for the observed values.
#'
#' This function calculates the non-linear R-squared. The equation used is:
#' \deqn{R^{2} = 1 - \frac{SS_{reg}}{SS_{tot}}}{R^2 = 1 - (SS_reg / SS_tot)}
#' (see p. 34-35
#' \href{http://www.mcb5068.wustl.edu/MCB/Lecturers/Baranski/Articles/RegressionBook.pdf}{here}).
#' Note that this nonlinear R-squared is not anything squared so it can result
#' a negative value. Negative values indicate poor model fit.
#'
#' @param observed a numeric vector of observations.
#' @param expected a numeric vector of expected values.
#' @return The nonlinear R-squared value (numeric scalar).
nlr2 <- function(observed, expected) {
# This function returns the nonlinear R^2 for the solutions found by
# nlminb(). The nonlinear R^2 is not really anything squared so it can
# result a negative value. Negative values indicate very poor model fit.
# Sum of squared deviances from the nonlinear regression line.
ss_reg <- sum((observed - expected) ^ 2)
# Sum of squared deviances from the mean of the observed values.
ss_tot <- sum((observed - mean(observed)) ^ 2)
# Return the nonlinear R^2.
1 - (ss_reg / ss_tot)
}
## sensitivity function ----
#' Sensitivity of taxa to an environmental gradient.
#'
#' \code{get_sens} returns the sensitivity of each taxon in a community data
#' frame (\code{sp}) to an environmental gradient (\code{env_grad}).
#'
#' Sensitivity is defined here by first finding how many detections were made
#' above and below the midpoint (\code{mid}) of the gradient, taking their
#' difference, and finally dividing this by the number of detections. Taxa with
#' more detections below (\code{mid}) than above will have a negative
#' sensitivity and vice versa. Note that while function \code{\link{est_brc}}
#' requires that a reference gradient scaled to 0-10, \code{get_sens} does not
#' require this.
#'
#' @param mid numeric scalar taken as the gradient's midpoint for sensitivity
#' (default is 5).
#' @inheritParams scale10
#' @inheritParams est_brc
#' @return A data frame containing each taxon's sensitivity (\code{Sens}) to the
#' gradient, and the number of sites the they were detected at (\code{n}).
#' @examples
#' x <- function() sample(0:1, 10, replace = TRUE)
#' sp <- data.frame(sp1 = x(), sp2 = x(), sp3 = x())
#' grad <- runif(10, 0, 10)
#' get_sens(sp, grad)
get_sens <- function(sp, env_grad, mid = 5) {
sens <- function(sp_vec, grad) {
pres <- sp_vec > 0
n <- sum(pres)
low <- sum(pres & grad < mid)
high <- n - low
s <- (high - low) / n
c(Sens = s, n = n)
}
res_matrix <- t(apply(sp, 2, sens, env_grad))
data.frame(res_matrix)
}
## scale function ----
#' Scale environmental gradient to 0-10
#'
#' \code{scale10} scales an environmental gradient to the range 0-10.
#' Optionally, it can invert the scale. This is essential for use with
#' \code{\link{est_brc}}.
#'
#' \code{scale10} is used to scale an environmental gradient for use in
#' generating Biotic Response Curves with \code{\link{est_brc}}. The reference
#' gradient returned by \code{scale10} has minimum 0 and maximum 10. If the
#' input environmental gradient has larger values for less desirable sites, use
#' \code{invert = TRUE} to invert the gradient so that 10 represents the most
#' desirable condition.
#'
#' @param env_grad numeric vector of environmental gradient scores, one for each
#' site.
#' @param invert logical indicating if \code{env_grad} should be inverted
#' (default is \code{FALSE}).
#' @return A numeric vector with \code{env_grad} scaled to 0-10.
#' @examples
#' grad <- runif(10)
#' scale10(grad, TRUE)
scale10 <- function(env_grad, invert = FALSE) {
ref_grad <- 10 * ((env_grad - min(env_grad)) / (max(env_grad) - min(env_grad)))
if (invert) ref_grad <- 10 - ref_grad
ref_grad
}
## Bin function ----
#' Bin observations
#'
#' \code{bin} combines species observations based on input environmental
#' gradient. Number of bins or number of records can be specified. Resulting
#' bins can also be examined using the \code{summary} option.
#'
#' \code{bin} is used to "bin" (i.e., combine by taking the mean) species
#' species observations for taxa used to create Biotic Response functions
#' (\code{\link{est_brc}}). Unlike other functions in package \code{iec},
#' function \code{bin} takes a data frame that contains the environmental
#' gradient as the first column.
#'
#' @param sp data frame containing species observations as columns with an
#' environmental gradient as the first column (needs work).
#' @param n scalar value indicating the number of bins or number of
#' records per bin (see \code{n_bins}).
#' @param n_bins logical indicating if \code{n} is the number of bins
#' (\code{TURE}; default) or records per bin.
#' @param summary logical indicating if \code{bin} should return the binned
#' records (\code{FALSE}; default), or a summary bins
#' @return If \code{summary = FALSE} (default), \code{bin} returns data frame
#' containing the binned species records and environmental gradient. Both of
#' these are means. If presence/absence data are used (codes as 0/1), the
#' mean is also a probability. If \code{summary = TRUE}, \code{bin} returns
#' a table of the number of records per bin.
#' @examples
#' x <- data.frame(gradient = round(abs(rnorm(107)), 2),
#' sp1 = rpois(107, 0.7),
#' sp2 = rpois(107, 0.5))
#'
#' bin(x, n = 7, summary = TRUE)
#' x_bin <- bin(x, n = 7)
bin <- function(sp, n, n_bins = TRUE, summary = FALSE) {
# assumes the first column in sp contains the environmental gradient
# note that the returned species values will be probabilities if the input
# is pres/abs
# setting n_bins = TRUE allows setting the number of bins used
# setting n_bins = FALSE allows setting the number of records per bin
# order records by gradient scores
sp <- sp[order(sp[, 1]), ]
# if n_bins was set to FALSE, adjust n
if (!n_bins) {
n <- floor(nrow(sp) / n)
}
bin_levels <- cut(1:nrow(sp), n, labels = FALSE, include.lowest = TRUE)
if (summary) {
# just return the number of records per bin
return(table(bin_levels))
} else {
# "[, -1]" because column 1 contains bin levels
aggregate(sp, list(bin_levels), mean)[, -1]
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.