R/spatOLE.R

#' Spatial OLE Interpolation
#'
# "It's raining dead birds." --Ned, \emph{Pushing Daisies}
#'
#' @description
#' This function interpolates the optimal linear estimator (OLE) in a spatial format using a base grid and a sighting data frame. Defaults set by sensitivity analysis presented in Carlson \emph{et al.} 2018
#'
#' @param sightings Data frame of sightings with columns "year", "longitude", "latitude", and "sighting" which gives quality. Long/lats must be given in decimal coordinates. Year records the date of a sighting (though it could be any continuous units of time) and sighting records the quality of the sighting. A "1" is a certain sighting (guaranteed valid) while a "2" or anything higher is uncertain. Uncertain sightings are not necessarily invalid, and researchers may want to differentiate within uncertain sightings by quality. We typically use three categories: 1 is physical evidence or certain expert sightings, 2 are plausible expert sightings, and 3 are dubious or novice sightings without strong evidence.
#' @param grid Blank raster onto which to interpolate your results.
#' @param k.nn The k nearest neighbors to pull (nonrandom method) or the k points to sample in every iteration from the N nearest neighbors (random method)
#' @param N.nn The N nearest neighbors to pull from in the randomized method. N must be greater than k.
#' @param randomize Implement the random method
#' @param reps How many iterations are used in the randomized method
#' @param parallel Do you want to parallelize the function?
#' @param setCores Do you want to manually set the number of cores to run the function on? Defaults to FALSE, and runs the function on all but one core detected automatically. Adaptive sampling and parallel processing cannot be turned on at the same time, and parallel supercedes adaptive.
#' @param cores Manually specify how many cores.
#' @param adaptive Do you want to run the model with adaptive estimation? Uses epsilon argument to set a convergence threshold, and after the first 10 runs in a cell, waits for the difference in the running mean before and after adding an iteration (delta) to drop below the convergence threshold (epsilon). Adaptive sampling and parallel processing cannot be turned on at the same time, and parallel supercedes adaptive.
#' @param epsilon Convergence threshold for adaptive estimation.
#' @param qualitycontrol Eliminates all uncertain sightings from the dataset ("sighting">1).
#' @param verbose Incredibly stupid, don't turn on (dead dove do not eat).
#'
#' @keywords Optimal linear estimator, OLE, spatial extinction date estimator
#'
#' @export
#'
#' @examples  
#' # EXAMPLE 1: NO INVALID SIGHTINGS
#' 
#' x <- makeSims(10,2)
#' ole <- spat.OLE(x$sightings,x$grid$blank)
#' rbPal <- colorRampPalette(c('blue','red'))
#' x$sightings$Col <- rbPal(10)[as.numeric(cut(x$sightings[,3],breaks = 10))]
#' par(mfrow=c(2,2))
#' plot(x$grid$blank,main='Sightings')
#' points(x$sightings[,c(1:2)], col=x$sightings$Col,pch=16,cex=1.5)
#' plot(x$grid$real,main='True Extinction Date')
#' plot(ole[[1]], main='Estimate')
#' plot(ole[[2]], main='Variance')
#' 
#' # EXAMPLE 2: MIXED VALIDITY SIGHTINGS
#' 
#' x <- errorSims(n=10,pts=2,ipts=5,p=0.1)
#' ole <- spat.OLE(x$sightings,x$grid$layer.2,qualitycontrol=TRUE)
#' rbPal <- colorRampPalette(c('blue','red'))
#' x$sightings$Col <- rbPal(10)[as.numeric(cut(x$sightings[,3],breaks = 10))]
#' par(mfrow=c(2,2))
#' plot(x$grid$layer.2,main='Sightings')
#' points(x$sightings[x$sightings$real==1,c(1:2)], col=x$sightings$Col,pch=16,cex=1.5)
#' points(x$sightings[x$sightings$real==0,c(1:2)], col='black',pch=19,cex=1.5)
#' plot(x$grid$layer.1,main='True Extinction Date')
#' plot(ole[[1]], main='Estimate')
#' plot(ole[[2]], main='Variance')
#' 
#' # EXAMPLE 3: ADAPTIVE RESAMPLING
#' 
#' x <- makeSims(10,2)
#' ole <- spat.OLE(x$sightings,x$grid$blank,parallel=TRUE,setCores=TRUE,cores=4)
#' ole2 <- spat.OLE(x$sightings,x$grid$blank,adaptive=TRUE,epsilon=0.005)
#' rbPal <- colorRampPalette(c('blue','red'))
#' x$sightings$Col <- rbPal(10)[as.numeric(cut(x$sightings[,3],breaks = 10))]
#' par(mfrow=c(3,2))
#' plot(x$grid$blank,main='Sightings')
#' points(x$sightings[,c(1:2)], col=x$sightings$Col,pch=16,cex=1.5)
#' plot(x$grid$real,main='True Extinction Date')
#' plot(ole[[1]], main='100 Reps')
#' plot(ole[[2]], main='Variance')
#' plot(ole2[[1]], main='Adaptive')
#' plot(ole2[[2]], main='Variance')
#' 

spat.OLE <- function(sightings, grid,  k.nn=7, N.nn=12, 
                     randomize=TRUE, reps=100,
                     parallel=FALSE,setCores=TRUE,cores=1,
                     adaptive=FALSE, epsilon=0.001,
                     qualitycontrol=FALSE, verbose=FALSE) {

  if(qualitycontrol==TRUE) {
    sightings <- sightings[sightings$sighting < 2,]
  }
  
  if(class(grid)=='RasterStack') {grid <- grid[[1]]}
  

  pts <- rasterToPoints(grid, spatial=TRUE)
  pts@data <- data.frame(pts@data, long=coordinates(pts)[,1],
                         lat=coordinates(pts)[,2])
  pts@data$extyear <- 0
  pts@data$var <- 0

  # Your k nearest neighbors gets set here
  nearest <- nn2(sightings[,c('longitude','latitude')],pts@data[,2:3],k=N.nn)
  
  # nearest <- nn2(sightings[,c('longitude','latitude')],pts@data[,2:3],k=nrow(sightings))
  
  # The 2:3 breaks when there's multiple elements in the RasterStack, though I'm somewhat unclear on how that produces the pattern it does. 
  # I think it randomizes along one axis but inherits real values along the other. It's because all the real values are zeroes, and that becomes one lat
  # It's not a real concern unless you break it. If you're reading this don't break it I beg you
  
  
  idx <- nearest$nn.idx
  dists <- nearest$nn.dists
  dists <- 1/dists
  for (i in 1:nrow(dists)){
    d.vector <- dists[i,]
    d.vector[!(d.vector==Inf)] <- d.vector[!(d.vector==Inf)]/sum(d.vector[!(d.vector==Inf)])
    d.vector[(d.vector==Inf)] <- 0.99999
    dists[i,] <- d.vector
  }

  pb <- txtProgressBar(style=3, min=1, max=nrow(idx))

  if (randomize==FALSE) {
    
    if (parallel==TRUE) {
      
      if(setCores==FALSE) {
        num_cores <- detectCores()-1
      } else {
        num_cores <- cores
      }
      
      cl<-makeCluster(num_cores)
      registerDoParallel(cl)
      
        pts@data$extyear <- foreach(i=1:nrow(idx), .combine='c') %dopar%  {
          row.list <- idx[i,c(1:k.nn)]
          sub <- sightings[row.list,]
          sub <- sub[,c('year','sighting')]
          sub.2 <- data.frame(table(sub$year))
          sub.2$Var1 <- as.numeric(as.character(sub.2$Var1))
          sExtinct::OLE.fun(sub.2[order(sub.2$Var1),], alpha=0.05)$Estimate
        }
        
      } else {
        
        for (i in 1:nrow(idx)) {
          row.list <- idx[i,c(1:k.nn)]
          sub <- sightings[row.list,]
          sub <- sub[,c('year','sighting')]
          sub.2 <- data.frame(table(sub$year))
          sub.2$Var1 <- as.numeric(as.character(sub.2$Var1))
          pts@data$extyear[i] <- suppressWarnings(OLE.fun(sub.2[order(sub.2$Var1),], alpha=0.05)$Estimate)
          setTxtProgressBar(pb, i)
        }
        
        
    }
    
    } else {
      
      if (parallel==TRUE) {
        
        if(setCores==FALSE) {
          num_cores <- detectCores()-1
        } else {
          num_cores <- cores
        }
        
        cl<-makeCluster(num_cores)
        registerDoParallel(cl)
        
        df.res <- foreach(i=1:length(pts@data$extyear), .combine=data.frame,
                          .export=ls(.GlobalEnv), .packages = c("spatExtinct")) %dopar%  {
                            oles <- c()
                            # nreps = j
                            for (j in 1:reps) {
                              row.list <- sample(idx[i,],k.nn,prob=dists[i,],replace=FALSE)
                              sub <- sightings[row.list,]
                              sub <- sub[,c('year','sighting')]
                              sub.2 <- data.frame(table(sub$year))
                              sub.2$Var1 <- as.numeric(as.character(sub.2$Var1))
                              oles <- c(oles,sExtinct::OLE.fun(sub.2[order(sub.2$Var1),], alpha=0.05)$Estimate)
                            }
                            if (verbose==FALSE) {print.noquote(i)}
                            data.frame(run=c(mean(oles[!is.na(oles)]),var(oles[!is.na(oles)])))
                          }
        
        pts@data$extyear <- t(df.res[1,])
        pts@data$var <- t(df.res[2,])
        
        
        
        
      } else { for (i in 1:nrow(idx)) {

    oles <- c()

    if (adaptive==TRUE) {

      for (j in 1:10) {

        est <- NA
        while(is.na(est)){
          row.list <- sample(idx[i,],k.nn,prob=dists[i,],replace=FALSE)
          sub <- sightings[row.list,]
          sub <- sub[,c('year','sighting')]
          sub.2 <- data.frame(table(sub$year))
          sub.2$Var1 <- as.numeric(as.character(sub.2$Var1))
          est <- suppressWarnings(sExtinct::OLE.fun(sub.2[order(sub.2$Var1),], alpha=0.05)$Estimate)
        }

        oles <- c(oles,est)

      }

      mean1 <- mean(oles)
      delta <- 1

      while (delta > epsilon) {


        est <- NA
        while(is.na(est)){
          row.list <- sample(idx[i,],k.nn,prob=dists[i,],replace=FALSE)
          sub <- sightings[row.list,]
          sub <- sub[,c('year','sighting')]
          sub.2 <- data.frame(table(sub$year))
          sub.2$Var1 <- as.numeric(as.character(sub.2$Var1))
          est <- suppressWarnings(sExtinct::OLE.fun(sub.2[order(sub.2$Var1),], alpha=0.05)$Estimate)
          }

        mean1 <- mean(na.omit(oles))
        oles <- c(oles,est)
        mean2 <- mean(na.omit(oles))        
        
        if(mean1==min(oles)) {
          delta <- 1
        } else {
          delta <- abs(mean2-mean1)/(mean1-min(oles))
        }
      }


    } else {
        for (j in 1:reps) {
          
          est <- NA
          
          while(is.na(est)){
            row.list <- sample(idx[i,],k.nn,prob=dists[i,],replace=FALSE)
            sub <- sightings[row.list,]
            sub <- sub[,c('year','sighting')]
            sub.2 <- data.frame(table(sub$year))
            sub.2$Var1 <- as.numeric(as.character(sub.2$Var1))
            est <- suppressWarnings(sExtinct::OLE.fun(sub.2[order(sub.2$Var1),], alpha=0.05)$Estimate)
          }
          
          oles <- c(oles,est)
        }
        
      }
    
    pts@data$extyear[i] <- mean(na.omit(oles))
    pts@data$var[i] <- var(oles)
    setTxtProgressBar(pb, i)

    }

}}

  ole.grid <- rasterize(pts, grid, pts$extyear, fun = mean)

  if(randomize==FALSE) {
    return(ole.grid)
  } else {
    ole.var.grid <- rasterize(pts, grid, pts$var, fun = mean)
    stack <- stack(ole.grid,ole.var.grid)
    names(stack) <- c('Mean OLE', 'Variogram')
    return(stack)
  }
  
  stopCluster()

}
cjcarlson/spatExtinct documentation built on May 25, 2019, 3:26 p.m.