#' @title Out-of-sample prediction for Nonparametric Constrained
#' Ordination
#'
#' @description
#' Calculates new scores for out-of-sample observations in a
#' multivariate NCO gradient space.
#'
#' @param obj object of class 'nco' from call to \code{\link{nco}}
#'
#' @param method distance measure for all ordinations
#'
#' @param neighb number of adjacent distances considered in prediction
#'
#' @param maxits number of iterations
#'
#' @param type either 'points' or 'text' for plotting
#'
#' @param cexn expansion factor for points and text
#'
#' @param ocol color value or vector for out-of-sample points
#'
#' @param ... additional arguments passed to function
#'
#' @return
#' List of class 'ncopredict' including new out-of-sample predicted
#' scores, and flags indicating which (if any) were beyond the range
#' of original scores. Specifically, the first item \code{nmsp} is a
#' list of 5 items describing predicted NCO scores:
#' \itemize{
#' \item scr_both = combined new and old NCO scores
#' \item scr_o = new NCO scores only
#' \item stress = overall stress of the solution, with new scores
#' \item iters = number of iterations performed
#' \item cause = ICAUSE from FORTRAN, reason for termination of
#' iterations: 1 = max iterations used, 2 = stress fell below
#' STRMIN, 3 = stress ratio exceeded SRATMX, 4 = scale factor of
#' gradient fell below SFGRMN
#' \item R2_enviro = squared correlation of D and Dz
#' \item R2_partial = squared correlation of Dz and each predictor
#' \item Axis_tau = rank correlation of each axis and predictor
#' }
#'
#' @details
#' Combines existing algorithms in multivariate workflow:
#'
#' NPMR + NMS = NCO[in-sample] \cr
#' NCO + NCOpredict = NCO[out-of-sample]
#'
#' The main function is a wrapper for \code{add.points()}, written by
#' \href{http://ecology.msu.montana.edu/labdsv/R/labs/lab9/lab9.html}{
#' Dave Roberts}, who deserves all credit. The intellectual pedigree
#' of the NMS prediction concept includes Dave Roberts, Peter Minchin,
#' Jari Oksanen, and Bruce McCune, although they each differ in
#' algorithmic approach. McCune et al. (1997a, 1997b) gave its first
#' use in the literature, while McMurray et al. (2015) gave the first
#' results from the R implementation. PC-ORD (McCune and Mefford
#' 2016) seems to be the only other software that offers NMS
#' prediction, and does so with a different algorithm that also flags
#' poorly fit new points.
#'
#' @examples
#' # set up
#' set.seed(978)
#' require(vegan)
#' data(varespec, varechem)
#' spe <- varespec ; id <- varechem
#' i <- sample(1:nrow(spe), size=floor(0.75*nrow(spe))) # sample
#' spe <- spe[i, ] # in-sample species
#' idi <- id[i, ] # in-sample predictors
#' ido <- id[-i, ] # out-of-sample predictors
#' nm <- c('Al', 'K') # select 1 or 2 gradients of interest
#'
#' # NPMR
#' res_npmr <- npmr(spe, idi, ido, nm, nmulti=5)
#' summary(res_npmr)
#'
#' # NCO (NMS)
#' res_nco <- nco(res_npmr, method='bray', thresh=0.90)
#' summary(res_nco)
#'
#' # NCOpredict (NMSpredict)
#' res_nmsp <- nco_predict(res_nco, method='bray', neighb=5,
#' maxits=999)
#' summary(res_nmsp)
#'
#' # plot the NCO gradient space
#' par(mfrow=c(1,2))
#' plot(res_nco) # original in-sample points
#' plot(res_nmsp) # add out-of-sample points
#' par(mfrow=c(1,1))
#'
#' @family ncopredict functions
#'
#' @references
#' McCune, B., J.P. Dey, J.E. Peck, K. Heiman, and S. Will-Wolf. 1997a.
#' Regional gradients in lichen communities of the southeast United
#' States. Bryologist 100:145-158.
#'
#' McCune, B., J.P. Dey, J.E. Peck, D. Cassell, K. Heiman, S.
#' Will-Wolf, and P.N. Neitlich. 1997b. Repeatability of community
#' data: species richness versus gradient scores in large-scale lichen
#' studies. Bryologist 100:40-46.
#'
#' McCune, B., and M. J. Mefford. 2016. PC-ORD. Multivariate Analysis
#' of Ecological Data. Version 7. MjM Software Design, Gleneden
#' Beach, OR.
#'
#' McMurray, J.A., D.W. Roberts, and L.H. Geiser. 2015. Epiphytic
#' lichen indication of nitrogen deposition and climate in the
#' northern rocky mountains, USA. Ecological Indicators 49:154-161.
#'
#' Roberts, D.W. 2017. LabDSV: Non-metric Multidimensional Scaling.
#' URL http://ecology.msu.montana.edu/labdsv/R/labs/lab9/lab9.html
#' [original code, as \code{add.points()}]
#'
#' @export
#' @rdname nco_predict
### wrapper for NMSpredict, get scores for out-of-sample sample units
`nco_predict` <- function(obj, method, neighb, maxits, ...){
stopifnot(class(obj)=='nco')
if(!identical(names(obj$iYhat), names(obj$oYhat))){
stop('Species must agree between old and new Yhat')
}
cat('Getting distances of combined in/out-of-sample data...\n\n')
D0 <- vegan::vegdist(rbind(obj$iYhat, obj$oYhat), method)
cat('Predicting new NCO scores...\n\n')
nmsp <- NMSpredict(scr=obj$scr_i, dis=D0, neighb, maxits)
names(nmsp)[names(nmsp)=='points'] <- 'scr_both'
names(nmsp)[names(nmsp)=='newpoints'] <- 'scr_o'
# flag if beyond range of existing scores
flagax1 <- nmsp$scr_o[
which(nmsp$scr_o[,1] > max(obj$scr_i[,1])|
nmsp$scr_o[,1] < min(obj$scr_i[,1])),]
flagax2 <- nmsp$scr_o[
which(nmsp$scr_o[,2] > max(obj$scr_i[,2]) |
nmsp$scr_o[,2] < min(obj$scr_i[,2])),]
out <- c(obj,
list(nmsp=nmsp,
flagax1=flagax1,
flagax2=flagax2))
class(out) <- 'ncopredict'
out
}
# #' @useDynLib vegan
### NMSpredict core algorithm lightly adapted from add.points()
`NMSpredict` <- function(scr, dis, neighb, maxits){
if (!requireNamespace("vegan", quietly = FALSE)) {
stop("Package package `vegan` required", call.=FALSE)
}
# convert original scores to local matrix, and get sizes
points <- list(points=scr)
class(points) <- 'nmds'
points <- points$points
oldn <- nrow(points)
ndim <- ncol(points)
totn <- attr(dis,'Size')
newn <- totn - oldn
# TODO: allow 1 point instead of many - until then, force > 1
if (newn < 1) stop('come on, try more than just 1 point!')
# test correspondence
if (!identical(dimnames(points)[[1]],attr(dis,'Labels')[1:oldn]))
stop('ordination and dissimilarity matrix do not match')
# decompose dissimilarity object to isolate new values
diss <- as.matrix(dis)[1:oldn,(oldn+1):totn]
# set up initial conditions
ndis <- oldn * newn
tmp <- matrix(rep(0,newn*ndim),ncol=ndim)
for (i in 1:newn) {
pnt <- seq(1,oldn)[order(diss[,i])][1:neighb]
weight <- 1-diss[pnt,i]
for (j in 1:ncol(points)) {
tmp[i,j] <-stats::weighted.mean(points[pnt,j],w=weight)
}
}
xinit <- rbind(points,tmp)
dimnames(xinit)[[1]] <- attr(dis,'Labels')
# set up indices
iidx <- rep((1:oldn),newn)
jidx <- NULL
for (i in (oldn+1):totn) jidx <- c(jidx,rep(i,oldn))
# set up ordination
nfix <- oldn
ngrp <- istart <- 1
isform <- 2
ities <- 1
iregn <- 1
iscal <- 0
sratmx <- 0.99999
strmin <- 1e-07
sfgrmn <- 1e-05
dist <- rep(0,ndis)
dhat <- rep(0,ndis)
x <- matrix(0,nrow=totn,ncol=ndim)
stress <- 1
strs <- ngrp
iters <- 1
icause <- 1
maxits <- maxits
iwork <- rep(0,ndis)
grad <- matrix(0,nrow=totn,ncol=ndim)
grlast <- matrix(0,nrow=totn,ncol=ndim)
out <- .Fortran('monoMDS',
nobj=as.integer(totn),
nfix=as.integer(nfix),
ndim=as.integer(ndim),
ndis=as.integer(ndis),
ngrp=as.integer(ngrp),
diss=as.double(diss),
iidx=as.integer(iidx),
jidx=as.integer(jidx),
xinit=as.double(xinit),
istart=as.integer(istart),
isform=as.integer(isform),
ities=as.integer(ities),
iregn=as.integer(iregn),
iscal=as.integer(iscal),
maxits=as.integer(maxits),
sratmx=as.double(sratmx),
strmin=as.double(strmin),
sfgrmn=as.double(sfgrmn),
dist=as.double(dist),
dhat=as.double(dhat),
points=as.double(x),
stress=as.double(stress),
strs=as.double(strs),
iters=as.integer(iters),
cause=as.integer(icause),
##################
PACKAGE='vegan'
##################
)
res <- list(points=matrix(out$points,ncol=ndim),
newpoints=matrix(out$points,ncol=ndim)[(oldn+1):totn,],
stress=out$stress,
iters=out$iters,
cause=out$cause)
dimnames(res$points)[[1]] <- attr(dis,'Labels')
dimnames(res$newpoints)[[1]] <- attr(dis,'Labels')[(oldn+1):totn]
class(res) <- 'nmds'
res
}
#' @export
#' @rdname nco
# summary method
`summary.ncopredict` <- function(obj, ...){
stopifnot(class(obj)=='ncopredict')
out <- list(nmsp = obj$nmsp,
flagax1 = obj$flagax1,
flagax2 = obj$flagax2)
out
}
#' @export
#' @rdname nco_predict
# plot both old and new scores
`plot.ncopredict` <- function(obj, type='points', ocol=2, cexn=NULL,
...){
stopifnot(class(obj)=='ncopredict')
type <- match.arg(type, c('points', 'text', 'none'))
ylim <- c(min(obj$scr_i[,2],obj$nmsp$scr_o[,2])*1.1,
max(obj$scr_i[,2],obj$nmsp$scr_o[,2])*1.1)
xlim <- c(min(obj$scr_i[,1],obj$nmsp$scr_o[,1])*1.1,
max(obj$scr_i[,1],obj$nmsp$scr_o[,1])*1.1)
if(!is.null(cexn)){
cexn <- normalize(obj$id_i[cexn])
} else { cexn <- 0.7 }
# par(oma=rep(0,4), mar=c(4,4,1,1))
vegan::ordiplot(obj$scr_i, type=type, display='sites', xlim=xlim,
ylim=ylim, cex=cexn, las=1)
if(type=='points'){
graphics::points(obj$nmsp$scr_o, col=ocol, pch=16, cex=0.7)
}
if(type=='text'){
graphics::text(obj$nmsp$scr_o,
labels=row.names(obj$nmsp$scr_o),
col=ocol, cex=0.7)
}
}
### end ###
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.