R/bootshtrap.R

# a bootstrapping algorithm for the modern era
# solves issue of duplicate years producing heterogeneous sample sizes, where uncertainty is high where points have clustered years
# also solves the issue of all years same 
# DOES slow runtime substantially.

bsplus <- function(sightings, k.nn, idx, dists, i) {
  
  # use cast to combine probabilities across unique years
  # rescale to 1 again
  # sample unique years
  
  distrow <- dists[i,]
  centerpoints <- sum(distrow==1) 
  
  if(centerpoints>k.nn) {
    row.list <- idx[i,][which(distrow==1)]
    sub <- sightings[row.list,]
    sub <- sub[,c('year','sighting')]
    sub.2 <- data.frame(table(sub$year))
    sub.2$Var1 <- as.numeric(as.character(sub.2$Var1))
    
    if(length(unique(sub.2$Var1))>=k.nn) {
      L <- length(order(sub.2$Var1))
      dates <- unique(sub.2$Var1)[(L-k.nn+1):L]
      needed <- 0
    } else {
      part.dates <-  sort(sub.2$Var1)
      needed <- k.nn-length(part.dates)
    }
  } else {
    if (centerpoints>0) {
      row.list <- idx[i,][which(distrow==1)]
      
      
      sub <- sightings[row.list,]
      sub <- sub[,c('year','sighting')]
      sub.2 <- data.frame(table(sub$year))
      sub.2$Var1 <- as.numeric(as.character(sub.2$Var1))
      
      part.dates <-  sort(sub.2$Var1)
      needed <- k.nn-length(part.dates)
    } else {
      needed <- k.nn
    }
  }
  
  if(needed>0) {
    
    idx.new<-idx[i,][which(!(dists[i,]==1))]
    dists.new<-dists[i,][which(!(dists[i,]==1))]
    
    row.list <- sample(idx.new,needed,prob=dists.new,replace=FALSE)
    sub <- sightings[row.list,]
    sub <- sub[,c('year','sighting')]
    sub.2 <- data.frame(table(sub$year))
    sub.2$Var1 <- as.numeric(as.character(sub.2$Var1))
    sub.2 <- sub.2[order(sub.2$Var1),]
    part.dates.2 <- sub.2$Var1
    
    part.dates.2<-unique(part.dates.2)
    if(exists('part.dates')) {
      part.dates<- c(part.dates,part.dates.2) 
    } else {
      part.dates <- part.dates.2
    }
    part.dates <- unique(part.dates)
    
    while(length(part.dates)<k.nn) {
      
      needed <- k.nn-length(part.dates)
      
      row.list <- sample(idx.new,needed,prob=dists.new,replace=FALSE)
      sub <- sightings[row.list,]
      sub <- sub[,c('year','sighting')]
      sub.2 <- data.frame(table(sub$year))
      sub.2$Var1 <- as.numeric(as.character(sub.2$Var1))
      sub.2 <- sub.2[order(sub.2$Var1),]
      part.dates.2 <- sub.2$Var1
      
    part.dates.2<-unique(part.dates.2)
    part.dates <- unique(c(part.dates,part.dates.2))
    }
    
    dates <- part.dates
    
  }
  
  dates <- sort(dates)
  
  return(dates)
}
cjcarlson/spatExtinct documentation built on May 25, 2019, 3:26 p.m.