#' @title NGNN, Nonlinear Gradient Nearest Neighbors
#'
#' @description Predict community composition based on individualistic
#' but possibly coordinated species responses
#'
#' @param spe species dataframe, rows = sample units and columns =
#' species
#'
#' @param idi in-sample predictor dataframe, rows must match 'spe'
#'
#' @param ido out-of-sample predictor dataframe, where rows = new
#' sample units
#'
#' @param nm string vector specifying predictors to include (max 2)
#'
#' @param nmulti number of random starts in nonparametric regression
#'
#' @param pa logical, convert to presence/absence?
#'
#' @param pr logical, use 'beals' for probs of joint occurrence?
#'
#' @param method distance measure for all ordinations
#'
#' @param thresh numeric threshold for stepacross dissimilarities
#'
#' @param neighb number of adjacent distances considered in NCOpredict
#'
#' @param maxits number of NCOpredict iterations
#'
#' @param k the maximum number of nearest neighbors to find in NCO
#' gradient space
#'
#' @param type either 'points' or 'text' for plotting
#'
#' @param cexn expansion factor for points and text
#'
#' @param ocol color value or vector for out-of-sample points
#'
#' @param obj object of class 'ngnn' from call to \code{ngnn}
#'
#' @param pick variable to query
#'
#' @param zlim vector of length 2, giving vertical limits for plots
#'
#' @param ... additional arguments passed to function
#'
#' @return
#' List of class 'ngnn' with elements:
#' \itemize{
#' \item spe = original species matrix
#' \item id_i = in-sample predictors used in NPMR
#' \item nm = which predictors were used
#' \item nm_len = their length
#' \item iYhat = in-sample fitted values from NPMR
#' \item oYhat = out-of-sample fitted values from NPMR
#' \item np_stat = fit, tolerances and results of signif tests
#' \item np_mods = list of NPMR regression models for every species
#' \item np_bw = list of NPMR bandwidths for every species,
#' \item scr_i = environmentally constrained site scores from NCO
#' \item NCO_model = the NCO ordination model
#' \item R2_internal = squared correlation of Dhat and Dz from NCO
#' \item R2_enviro = squared correlation of D and Dz from NCO
#' \item R2_partial = squared correlation of Dz and each predictor
#' from NCO
#' \item Axis_tau = rank correlation of each axis and predictor from
#' NCO
#' \item nmsp = a list of 5 items describing predicted NCO scores
#' (see \code{\link{nco_predict}})
#' \item flagax1,flagax2 = flags which nco_predict values were off
#' axes
#' \item nn = identifies the gradient nearest neighbor for predicted
#' vals
#' \item spp_imputed = inferred out-of-sample species compositions
#' }
#'
#' @details
#' When given a set of sample units where species abundances and
#' corresponding predictor values are both known, how does one infer
#' which species should appear in 'new' sample units where only the
#' predictors are known? NGNN (nonlinear gradient nearest neighbors)
#' approaches the problem of species imputation in the following way:
#'
#' Regress species individualistically on predictors -> \cr Feed
#' fitted values to NMS ordination -> \cr Find nearest neighbors in
#' ordination space, and assign species. \cr
#'
#' A more detailed description: \cr
#' First, define an \emph{in-sample} set of sample units where species
#' abundances and corresponding predictor values are both known, as
#' well as an \emph{out-of-sample} set where only predictor values are
#' known. Second, use \code{\link{npmr}} to perform NPMR regression
#' (McCune 2006) of both in-sample and out-of-sample sample units; use
#' \code{\link{nco}} to feed NPMR fitted values to NMS ordination
#' (Kruskal 1964); this is nonparametric constrained ordination (NCO;
#' McCune and Root 2012; McCune and Root 2017). A follow-up step with
#' \code{\link{nco_predict}} allows calculating predicted NCO scores
#' for the \emph{out-of-sample} set even though species compositions
#' are not strictly known. Finally, use \code{gnn} to identify
#' the \emph{in-sample} Euclidean nearest neighbor of each
#' \emph{out-of-sample} point in the NCO ordination space, and assign
#' the (possibly averaged) species composition of that neighbor to the
#' point in question. This retains realistic communities of
#' co-occurring species, since they've already been observed in at
#' least one other sample unit. The entire process is summarized in
#' the wrapper function \code{\link{ngnn}}.
#'
#' Function \code{\link{ngnn}} finds the \emph{k} nearest nighbors in
#' the original ordination space; higher values of \emph{k} probably
#' work better with many original points, and with points more evenly
#' distributed in ordination space.
#'
#' @examples
#' # set up
#' set.seed(978)
#' require(vegan)
#' data(varespec, varechem)
#' spe <- varespec ; id <- varechem
#' i <- sample(1:nrow(spe), size=floor(0.75*nrow(spe))) # sample
#' spe <- spe[i, ] # in-sample species
#' idi <- id[i, ] # in-sample predictors
#' ido <- id[-i, ] # out-of-sample predictors
#' nm <- c('Al', 'K') # select 1 or 2 gradients of interest
#'
#' # basic usage
#' res <- ngnn(spe, idi, ido, nm, nmulti=5, method='bray',
#' thresh=0.90, neighb=5, maxits=999, k=1)
#' summary(res)
#' str(res, 1)
#'
#' # plot the species response curves
#' ngnn_plot_spp(res, pick=1:9, nm=nm)
#'
#' # plot the NCO gradient space
#' ngnn_plot_nco(res)
#'
#' # predicted (imputed) species composition for out-of-sample sites
#' ngnn_get_spp(res)
#'
#' # how close were predicted species composition to 'true' values?
#' spe_append <- rbind(spe, res$spp_imputed) # append to existing
#' heatmap(t(as.matrix(spe_append)), Rowv=NA, Colv=NA)
#'
#' # check composition of 'hold-out' data
#' heatmap(t(as.matrix(varespec[-i,])), Rowv=NA, Colv=NA)
#' # ... vs new species from NGNN
#' heatmap(t(as.matrix(res$spp_imputed)), Rowv=NA, Colv=NA)
#'
#' # Prediction error: Root Mean Square Error
#' `rmse` <- function(y, ypred, ...){
#' sqrt(mean((y-ypred)^2, ...))
#' }
#' rmse(varespec[-i,], res$spp_imputed)
#'
#'
#' ## can do entire process manually, avoiding the wrapper function:
#' # NPMR
#' res_npmr <- npmr(spe, idi, ido, nm, nmulti=5)
#' # NCO (NMS)
#' res_nco <- nco(res_npmr, method='bray', thresh=0.90)
#' # NCOpredict (NMSpredict)
#' res_nmsp <- nco_predict(res_nco, method='bray', neighb=5,
#' maxits=999)
#' # GNN
#' res_gnn <- gnn(obj=res_nmsp, k=1)
#' summary(res_gnn)
#'
#'
#' @references
#' Kruskal, J. B. 1964. Multidimensional scaling by optimizing
#' goodness of fit to a nonmetric hypothesis. Psychometrika 29:
#' 1-27.
#'
#' McCune, B. 2006. Non-parametric habitat models with automatic
#' interactions. Journal of Vegetation Science 17(6):819-830.
#'
#' McCune, B., and H. T. Root. 2012. Nonparametric constrained
#' ordination. 97th ESA Annual Meeting. Ecological Society of
#' America, Portland, OR.
#'
#' McCune, B., and H. T. Root. 2017. Nonparametric constrained
#' ordination to describe community and species relationships to
#' environment. Unpublished ms.
#'
#' Ohmann, J.L., and M.J. Gregory. 2002. Predictive mapping of forest
#' composition and structure with direct gradient analysis and
#' nearest-neighbor imputation in coastal Oregon, U.S.A. Canadian
#' Journal of Forest Research 32:725-741.
#'
#' @family ngnn functions
#' @seealso \code{\link{npmr}} for NPMR, \code{\link{nco}} for NCO,
#' \code{\link{nco_predict}} for predictive NCO, and
#' \code{gnn} for the core function of NGNN.
#'
#' @export
#' @rdname ngnn
### final wrapper function
`ngnn` <- function(spe, idi, ido, nm, nmulti=5, pa=FALSE, pr=FALSE,
method='bray', thresh=0.90,
neighb=5, maxits=999, k=1, ...){
res_npmr <- npmr(spe, idi, ido, nm, nmulti, pa, pr, ...)
# NCO (NMS)
res_nco <- nco(obj=res_npmr, method, thresh, ...)
# NCOpredict (NMSpredict)
res_nmsp <- nco_predict(obj=res_nco, method, neighb, maxits, ...)
# GNN
res_gnn <- gnn(obj=res_nmsp, k, ...)
class(res_gnn) <- 'ngnn'
res_gnn
}
#' @export
#' @rdname ngnn
### GNN core function
`gnn` <- function(obj, k, ...){
stopifnot(class(obj)=='ncopredict')
cat('Finding nearest neighbor(s) in NCO space...\n\n')
nn <- FNN::get.knnx(data=obj$scr_i, query=obj$nmsp$scr_o,
k=k, algo='kd_tree')$nn.index
rowseq <- 1:nrow(nn)
tmp <- obj$spe[rowseq, ]
tmp[] <- NA
for (r in rowseq){ # for every new plot
# avg neighbor abundances:
tmp[r,] <- colMeans(obj$spe[nn[r,],])
}
# TODO: could use weighted mean abundance, weighted by distance
cat('Assigning spp composition to out-of-sample SUs...\n\n')
row.names(tmp) <- row.names(obj$nmsp$scr_o)
out <- c(obj, list(nn = nn, spp_imputed = tmp))
class(out) <- 'ngnn'
out
}
#' @export
#' @rdname ngnn
# summary for NGNN
`summary.ngnn` <- function(obj, ...){
stopifnot(class(obj)=='ngnn')
out <- list(np_stat = obj$np_stat,
R2_internal = obj$R2_internal,
R2_enviro = obj$R2_enviro,
R2_partial = obj$R2_partial,
Axis_tau = obj$Axis_tau,
flagax1 = obj$flagax1,
flagax2 = obj$flagax2)
isnum <- sapply(out, is.numeric)
out[isnum] <- lapply(out[isnum], round, digits=3)
out
}
#' @export
#' @rdname ngnn
# plot both old and new scores
`ngnn_plot_nco` <- function(obj, type='points', ocol=2, cexn=NULL,
...){
stopifnot(class(obj)=='ngnn')
type <- match.arg(type, c('points', 'text', 'none'))
ylim <- c(min(obj$scr_i[,2],obj$nmsp$scr_o[,2])*1.1,
max(obj$scr_i[,2],obj$nmsp$scr_o[,2])*1.1)
xlim <- c(min(obj$scr_i[,1],obj$nmsp$scr_o[,1])*1.1,
max(obj$scr_i[,1],obj$nmsp$scr_o[,1])*1.1)
if(!is.null(cexn)){
cexn <- normalize(obj$id_i[cexn])
} else { cexn <- 0.7 }
vegan::ordiplot(obj$scr_i, type=type, display='sites',
xlim=xlim, ylim=ylim, cex=cexn, las=1)
if(type=='points'){
graphics::points(obj$nmsp$scr_o, col=ocol, pch=16, cex=0.7)
}
if(type=='text'){
graphics::text(obj$nmsp$scr_o,
labels=row.names(obj$nmsp$scr_o),
col=ocol, cex=0.7)
}
}
#' @export
#' @rdname ngnn
# extractor function for species composition in predicted target sites
`ngnn_get_spp` <- function(obj, ...){
obj$spp_imputed
}
#' @export
#' @rdname ngnn
# inspect species response curves from NPMR
`ngnn_plot_spp` <- function(obj, pick=NULL, zlim, nm, ...){
stopifnot(class(obj)=='ngnn')
spe <- obj$spe
obj <- obj$np_mods
ev <- 30
wasmissing <- missing(zlim)
if(is.null(pick)) pick <- c(1:ncol(spe))
if(is.character(pick)) pick <- which(names(spe) %in% pick)
`fn5` <- function (nn=length(pick)) {
if (nn <= 3) c(1, nn)
else if (nn <= 6) c(2, (nn + 1)%/%2)
else if (nn <= 12) c(3, (nn + 2)%/%3)
else c(ceiling( nn / (nr <- ceiling(sqrt(nn)))), nr)
}
x1 <- seq(min(obj[[1]]$eval[[1]]),max(obj[[1]]$eval[[1]]),len=ev)
x2 <- seq(min(obj[[1]]$eval[[2]]),max(obj[[1]]$eval[[2]]),len=ev)
dd <- expand.grid(x1, x2)
names(dd) <- nm
graphics::par(mfrow=c(fn5()[1], fn5()[2]),
oma=c(0,0,0,0), mar=c(0,0,.9,0))
cat('Plotting species response curves, just a moment...\n')
for (i in pick){
f <- matrix(stats::predict(obj[[i]], newdata=dd), ev, ev)
if(wasmissing) {
if(identical(min(f), max(f))) mn <- 0 else mn <- min(f)
zlim <- c(mn*0.9, max(f)*1.1)
}
graphics::persp(x1, x2, f, col='lightblue',
main=names(obj)[[i]],
xlab=nm[[1]], ylab=nm[[2]], zlab='',
theta=125, phi=35, d=1.5, ltheta = -30,
lphi = 55, shade=0.9, ticktype='d',
expand=0.7, zlim=zlim)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.