Using_pMEM.R
In pMEM: Predictive Moran's Eigenvector Maps

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
hook_output <- knitr::knit_hooks$get("output")
knitr::knit_hooks$set(output = function(x, options) {
  if (!is.null(n <- options$out.lines)) {
    x <- xfun::split_lines(x)
    if (length(x) > n) {
      # truncate the output
      x <- c(head(x, n), "....\n")
    }
    x <- paste(x, collapse = "\n")
  }
  hook_output(x, options)
})
##
### Load packages here:
##
### Figure counter:
(
  function() {
    log <- list(
      labels = character(),
      captions = character()
    )
    list(
      register = function(label, caption) {
        log$labels <<- c(log$labels, label)
        log$captions <<- c(log$captions, caption)
        invisible(NULL)
      },
      getNumber = function(label) {
        which(log$labels == label)
      },
      getCaption = function(label) {
        a <- which(log$labels == label)
        cap <- log$captions[a]
        cat(sprintf("Fig. %d. %s\n\n---\n",a,cap))
        invisible(NULL)
      }
    )
  }
)() -> fc
fc$register(
  "sef1",
  paste(
    "Examples of pMEM spatial eigenfunctions of order 1 with descriptor (back",
    "markers) and prediction scores (red markers). The black continuous line",
    "is calculated for 1-m intervals to show the continuity of the spatial",
    "eigenfunctions."
  )
)
fc$register(
  "depth",
  paste(
    "Spatially-explicit predictions of channel depth (black solid line) with",
    "observations used training (black markers) and testing (red markers) the",
    "model."
  )
)
fc$register(
  "velocity",
  paste(
    "Spatially-explicit predictions of current velocity (black solid line)",
    "with observations used training (black markers) and testing (red markers)",
    "the model."
  )
)
fc$register(
  "substrate",
  paste(
    "Spatially-explicit predictions of mean substrate grain size (black solid",
    "line) with observations used training (black markers) and testing (red",
    "markers) the model."
  )
)

## -----------------------------------------------------------------------------
data("salmon", package = "pMEM")

## -----------------------------------------------------------------------------
library("pMEM")         ## To calculate pMEM
library("magrittr")     ## For its very handy pipe operateur (%>%)
library("glmnet")       ## To calculate elastic net regression

## -----------------------------------------------------------------------------
set.seed(1234567890)                     ## For the drawing to be repeatable.
sort(sample(nrow(salmon),25)) -> test    ## Drawing the testing set (N = 25).
(1:nrow(salmon))[-test] -> train         ## The training set is the remainder.

## -----------------------------------------------------------------------------
genSEF(
  x = salmon$Position[train],   ## The set of locations.
  m = genDistMetric(),          ## The distance metric function.
  f = genDWF("linear", 500)     ## The distance weighting function.
) -> sef0

sef0                            ## Show the resulting object.

## ----fig.width=7.25, fig.height=10--------------------------------------------
## A regular transect of points 1 m apart:
salmon$Position %>%
  {seq(min(.) - 20, max(.) + 20, 1)} -> xx

## Custom plotting function:
plotSEF <- function(sef, xTrain, xTest, xx, wh, ...) {
  plot(x = xx, y = predict(sef, xx, wh), type = "l", ...)
  points(x = xTrain, y = as.matrix(sef, wh), pch=21, bg="black")
  points(x = xTest, y = predict(sef, xTest)[,wh], pch=21, bg="red")
  invisible(NULL)
}

## Storing the graphical parameters:
p <- par(no.readonly = TRUE)

## Changing the graphical parameters:
par(mfrow=c(3,2), mar=c(4.6,4.6,3,1))

## Generate a six-inset plot:
for(fun in c("power","hyperbolic","spherical","exponential","Gaussian",
             "hole_effect"))
  genSEF(
    x = salmon$Position[train],
    m = genDistMetric(),
    f = genDWF(fun, 250, 0.75)
  ) %>%
    plotSEF(salmon$Position[train], salmon$Position[test], xx, 1,
            xlab="Location (m)", ylab="pMEM score", main=fun, lwd=2)

## ----echo=FALSE, results='asis'-----------------------------------------------
fc$getCaption("sef1")

## -----------------------------------------------------------------------------
## Restoring the graphical parameters:
par(p)

## -----------------------------------------------------------------------------
getMinMSE(
  U = as.matrix(sef0),
  y = salmon$Depth[train],
  Up = predict(sef0, salmon$Position[test]),
  yy = salmon$Depth[test],
  complete = FALSE
)

## -----------------------------------------------------------------------------
objf <- function(par, m, fun, x, xx, y, yy, lb, ub) {
  
  ## Bound the parameter values within the lb -> ub intervals:
  par <- (ub - lb) * (1 + exp(-par))^(-1) + lb
  ## This step is necessary to prevent pitfalls during optimization.
  
  ## Calculate the SEF under the conditions requested
  if(fun %in% c("power","hyperbolic")) {
    sef <- genSEF(x, m, genDWF(fun, range = par[1L], shape = par[2L]))
  } else
    sef <- genSEF(x, m, genDWF(fun, range = par[1L]))
  
  ## Calculate the minMSE model
  res <- getMinMSE(as.matrix(sef), y, predict(sef, xx), yy, FALSE)
  
  ## The objective criterion is the out of the sample mean squared error:
  res$mse
}

## -----------------------------------------------------------------------------
objf(
  par = c(0),
  m = genDistMetric(),
  fun = "linear",
  x = salmon$Position[train],
  xx = salmon$Position[test],
  y = salmon[["Depth"]][train],
  yy = salmon[["Depth"]][test],
  lb = c(10),
  ub = c(1000)
) -> res

res

## ----eval=FALSE---------------------------------------------------------------
#  sefTrain <- list()  ## For storing the "SEMap" objects.
#  mseRes <- list()    ## For storing results from function getMinMSE().
#  sel <- list()       ## For storing selected pMEM eigenfunctions.
#  lm <- list()        ## For storing the linear models.
#  prd <- list()       ## For storing the predictions.

## ----echo=FALSE---------------------------------------------------------------
load(file = "Using_pMEM.rda")
mseRes <- list()
sel <- list()
lm <- list()
prd <- list()

## -----------------------------------------------------------------------------
estimateSEF <- function(x, xx, y, yy, lower, upper) {
  
  res <- list(optim = list())  ## A list to contain the results.
  
  ## This loop tries the seven DWF one by one, estimating 'dmax' (and, when
  ## necessary, 'shape') using simulated annealing.
  for(fun in c("linear","power","hyperbolic","spherical","exponential",
               "Gaussian","hole_effect")) {
    optim(
      par = c(0,if(fun %in% c("power","hyperbolic")) 0), fn = objf,
      method = "SANN", m = genDistMetric(), fun = fun,
      x = x, xx = xx, y = y, yy = yy,
      lb = c(lower[1],if(fun %in% c("power","hyperbolic")) lower[2]),
      ub = c(upper[1],if(fun %in% c("power","hyperbolic")) upper[2])
      
    ) -> res$optim[[fun]]
  }
  
  ## Extract the minimum values from the list of optimization:
  unlist(
    lapply(
      res$optim,
      function(x) x$value
    )
  ) -> res$bestval
  
  ## Find which DWF had the minimum objective criterion value:
  names(
    which.min(
      res$bestval
    )
  ) -> res$fun
  
  ## Back-transform the parameter values:
  res %>%
    {.$optim[[.$fun]]$par} %>%
    {(upper - lower) * (1 + exp(-.))^(-1) + lower} -> res$par
  
  ## Calculate the SEF using the optimized DWF parameters:
  res %>%
    {genSEF(
      x = x,
      m = genDistMetric(),
      f = genDWF(.$fun, .$par[1L], if(length(.$par) > 1) .$par[1L])
    )} -> res$sef
  
  ## Return the result list:
  res
}

## ----eval=FALSE---------------------------------------------------------------
#  estimateSEF(
#    x = salmon$Position[train],
#    xx = salmon$Position[test],
#    y = salmon[["Depth"]][train],
#    yy = salmon[["Depth"]][test],
#    lower = c(20,0.25),
#    upper = c(1000,1.75)
#  ) -> sefTrain[["Depth"]]

## -----------------------------------------------------------------------------
## Calculate the channel depth model:
sefTrain[["Depth"]]$sef %>%
  {getMinMSE(
    U = as.matrix(.),
    y = salmon[["Depth"]][train],
    Up = predict(., salmon$Position[test]),
    yy = salmon[["Depth"]][test]
  )} -> mseRes[["Depth"]]

## Extract the selected SEF:
mseRes[["Depth"]] %>% {sort(.$ord[1:.$wh])} -> sel[["Depth"]]

## -----------------------------------------------------------------------------
## Calculate a linear model from the selected SEF:
lm(
  formula = y~.,
  data = cbind(
    y = salmon[["Depth"]][train],
    as.data.frame(sefTrain[["Depth"]]$sef, wh=sel[["Depth"]])
  )
) -> lm[["Depth"]]

## Calculate the predictions:
predict(
  lm[["Depth"]],
  newdata = as.data.frame(
    predict(
      object = sefTrain[["Depth"]]$sef,
      newdata = xx,
      wh = sel[["Depth"]]
    )
  )
) -> prd[["Depth"]]

## ----fig.width=6, fig.height=6------------------------------------------------
plot(x=xx, y=prd[["Depth"]], type="l",
     ylim=range(salmon[["Depth"]], prd[["Depth"]]), las=1,
     ylab="Channel depth (m)", xlab="Location along the transect (m)")
points(x = salmon$Position[train], y = salmon[["Depth"]][train], pch=21,
       bg="black")
points(x = salmon$Position[test], y = salmon[["Depth"]][test], pch=21, bg="red")

## ----echo=FALSE, results='asis'-----------------------------------------------
fc$getCaption("depth")

## ----eval=FALSE---------------------------------------------------------------
#  ## Estimate the most adequate predictive Moran's eigenvector map:
#  estimateSEF(
#    x = salmon$Position[train],
#    xx = salmon$Position[test],
#    y = salmon[["Velocity"]][train],
#    yy = salmon[["Velocity"]][test],
#    lower = c(20,0.25),
#    upper = c(1000,1.75)
#  ) -> sefTrain[["Velocity"]]

## -----------------------------------------------------------------------------
## Calculate the current velocity model:
sefTrain[["Velocity"]]$sef %>%
  {getMinMSE(
    U = as.matrix(.),
    y = salmon[["Velocity"]][train],
    Up = predict(., salmon$Position[test]),
    yy = salmon[["Velocity"]][test]
  )} -> mseRes[["Velocity"]]

## Extract the selected SEF:
mseRes[["Velocity"]] %>% {sort(.$ord[1:.$wh])} -> sel[["Velocity"]]

## Calculate a linear model from the selected SEF:
lm(
  formula = y~.,
  data = cbind(
    y = salmon[["Velocity"]][train],
    as.data.frame(sefTrain[["Velocity"]]$sef, wh=sel[["Velocity"]])
  )
) -> lm[["Velocity"]]

## Calculate the predictions:
predict(
  lm[["Velocity"]],
  newdata = as.data.frame(
    predict(
      object = sefTrain[["Velocity"]]$sef,
      newdata = xx,
      wh = sel[["Velocity"]]
    )
  )
) -> prd[["Velocity"]]

## ----fig.width=6, fig.height=6------------------------------------------------
plot(x=xx, y=prd[["Velocity"]], type="l",
     ylim=range(salmon[["Velocity"]], prd[["Velocity"]]), las=1,
     ylab="Velocity (m/s)", xlab="Location along the transect (m)")
points(x = salmon$Position[train], y = salmon[["Velocity"]][train], pch=21,
       bg="black")
points(x = salmon$Position[test], y = salmon[["Velocity"]][test], pch=21,
       bg="red")

## ----echo=FALSE, results='asis'-----------------------------------------------
fc$getCaption("velocity")

## ----eval=FALSE---------------------------------------------------------------
#  ## Estimate the most adequate predictive Moran's eigenvector map:
#  estimateSEF(
#    x = salmon$Position[train],
#    xx = salmon$Position[test],
#    y = salmon[["Substrate"]][train],
#    yy = salmon[["Substrate"]][test],
#    lower = c(20,0.25),
#    upper = c(1000,1.75)
#  ) -> sefTrain[["Substrate"]]

## -----------------------------------------------------------------------------
## Calculate the mean substrate grain size model:
sefTrain[["Substrate"]]$sef %>%
  {getMinMSE(
    U = as.matrix(.),
    y = salmon[["Substrate"]][train],
    Up = predict(., salmon$Position[test]),
    yy = salmon[["Substrate"]][test]
  )} -> mseRes[["Substrate"]]

## Extract the selected SEF:
mseRes[["Substrate"]] %>% {sort(.$ord[1:.$wh])} -> sel[["Substrate"]]

## Calculate a linear model from the selected SEF:
lm(
  formula = y~.,
  data = cbind(
    y = salmon[["Substrate"]][train],
    as.data.frame(sefTrain[["Substrate"]]$sef, wh=sel[["Substrate"]])
  )
) -> lm[["Substrate"]]

## Calculate the predictions:
predict(
  lm[["Substrate"]],
  newdata = as.data.frame(
    predict(
      object = sefTrain[["Substrate"]]$sef,
      newdata = xx,
      wh = sel[["Substrate"]]
    )
  )
) -> prd[["Substrate"]]

## ----echo=FALSE, fig.width=6, fig.height=6------------------------------------
plot(x=xx, y=prd[["Substrate"]], type="l",
     ylim=range(salmon[["Substrate"]], prd[["Substrate"]]), las=1,
     ylab="Mean grain size (mm)",
     xlab="Location along the transect (m)")
points(x = salmon$Position[train], y = salmon[["Substrate"]][train], pch=21,
       bg="black")
points(x = salmon$Position[test], y = salmon[["Substrate"]][test], pch=21,
       bg="red")

## ----echo=FALSE, results='asis'-----------------------------------------------
fc$getCaption("substrate")

## -----------------------------------------------------------------------------
objf2 <- function(par, m, fun, x, xx, y, yy, w, ww, lb, ub) {
  par <- (ub - lb) * (1 + exp(-par))^(-1) + lb
  if(fun %in% c("power","hyperbolic")) {
    sef <- genSEF(x, m, genDWF(fun, range = par[3L], shape = par[4L]))
  } else
    sef <- genSEF(x, m, genDWF(fun, range = par[3L]))
  glm1 <- glmnet(x = cbind(w, as.matrix(sef)), y = y, family = "poisson",
                 alpha = par[1L], lambda = par[2L])
  pp <- predict(glm1, newx = cbind(ww, predict(sef, xx)), type="response")
  -2*sum(dpois(yy, pp, log = TRUE))
}

## -----------------------------------------------------------------------------
## Implement a list of orthogonal polynomial objects:
plist <- list()
plist[["Depth"]] <- poly(salmon[train,"Depth"],2)
plist[["Velocity"]] <- poly(salmon[train,"Velocity"],2)
plist[["Substrate"]] <- poly(salmon[train,"Substrate"],2)

## The matrix of auxiliary descriptor for the training set:
cbind(
  as.matrix(plist[["Depth"]]),
  as.matrix(plist[["Velocity"]]),
  as.matrix(plist[["Substrate"]])
) -> w

## Generate suitable column names:
c("Depth^1","Depth^2",
  "Velocity^1","Velocity^2",
  "Substrate^1","Substrate^2") -> colnames(w)

## The matrix of auxiliary descriptor for the testing set:
cbind(
  predict(plist[["Depth"]], newdata=salmon[test,"Depth"]),
  predict(plist[["Velocity"]], newdata=salmon[test,"Velocity"]),
  predict(plist[["Substrate"]], newdata=salmon[test,"Substrate"])
) -> ww

## Copying the column names:
colnames(ww) <- colnames(w)

## -----------------------------------------------------------------------------
objf2(
  par = c(0, 0, 0, 0),
  m = genDistMetric(),
  fun = "Gaussian",
  x = salmon$Position[train],
  xx = salmon$Position[test],
  y = salmon[["Abundance"]][train],
  yy = salmon[["Abundance"]][test],
  w = w,
  ww = ww,
  lb = c(0,0,20,0.25),
  ub=c(1,1,1000,1.75)
) -> res2

res2

## -----------------------------------------------------------------------------
estimateSEF2 <- function(x, xx, y, yy, w, ww, lower, upper) {
  
  res <- list(optim = list())  ## A list to contain the results.
  
  ## This loop tries the seven DWF one by one, estimating 'dmax' (and, when
  ## necessary, 'shape') using simulated annealing.
  for(fun in c("linear","power","hyperbolic","spherical","exponential",
               "Gaussian","hole_effect")) {
    optim(
      par = c(0,0,0,if(fun %in% c("power","hyperbolic")) 0), fn = objf2,
      method = "SANN", m = genDistMetric(), fun = fun,
      x = x, xx = xx, y = y, yy = yy, w = w, ww = ww,
      lb = c(lower[1:3],if(fun %in% c("power","hyperbolic")) lower[4]),
      ub = c(upper[1:3],if(fun %in% c("power","hyperbolic")) upper[4])
    ) -> res$optim[[fun]]
  }
  
  ## Extract the minimum values from the list of optimization:
  unlist(
    lapply(
      res$optim,
      function(x) x$value
    )
  ) -> res$bestval
  
  ## Find which DWF had the minimum objective criterion value:
  names(
    which.min(
      res$bestval
    )
  ) -> res$fun
  
  ## Back-transform the parameter values:
  res %>%
    {.$optim[[.$fun]]$par} %>%
    {(upper[1:length(.)] - lower[1:length(.)]) * (1 + exp(-.))^(-1) +
        lower[1:length(.)]} -> res$par
  
  ## Calculate the SEF using the optimized DWF parameters:
  res %>%
    {genSEF(
      x = x,
      m = genDistMetric(),
      f = genDWF(.$fun, .$par[3], if(length(.$par) > 3) .$par[4])
    )} -> res$sef
  
  ## Return the result list:
  res
}

## ----eval=FALSE---------------------------------------------------------------
#  estimateSEF2(
#    x = salmon$Position[train],
#    xx = salmon$Position[test],
#    y = salmon[["Abundance"]][train],
#    yy = salmon[["Abundance"]][test],
#    w = w,
#    ww = ww,
#    lower = c(0,0,20,0.25),
#    upper = c(1,1,1000,1.75)
#  ) -> sefTrain[["Abundance"]]

## -----------------------------------------------------------------------------
cbind(w, as.matrix(sefTrain[["Abundance"]]$sef)) %>%
  glmnet(
    y = salmon$Abundance[train], family = "poisson",
    alpha = sefTrain[["Abundance"]]$par[1L],
    lambda = sefTrain[["Abundance"]]$par[2L]) -> lm[["Abundance"]]

## Model coefficients:
coef(lm[["Abundance"]])

## -----------------------------------------------------------------------------
lm[["Abundance"]] %>%
  predict(
    cbind(
      predict(plist[["Depth"]], prd[["Depth"]]),
      predict(plist[["Velocity"]], prd[["Velocity"]]),
      predict(plist[["Substrate"]], prd[["Substrate"]]),
      predict(sefTrain[["Abundance"]]$sef, xx)
    ),
    type="response"
  ) ->  prd[["Abundance"]]

## ----fig.width=6, fig.height=6------------------------------------------------
plot(x=xx, y=prd[["Abundance"]], type="l",
     ylim=range(salmon[["Abundance"]], prd[["Abundance"]]), las=1,
     ylab="Parr abundance (fish)",
     xlab="Location along the transect (m)")
points(x = salmon$Position[train], y = salmon[["Abundance"]][train], pch=21,
       bg="black")
points(x = salmon$Position[test], y = salmon[["Abundance"]][test], pch=21,
       bg="red")