scripts/old/CalculateNicheSubspeciesDifferences_old.R

#' Print Labeled Points to a PNG
#'
#' This function xxx
#'
#' @param species xx
#' @param subspecies xx
#' @param bg xx
#' @param loc_good xx
#'
#' @export
#' @examples
#'
#' printPointsPng(species,subspecies,bg,loc_good)
printPointsPng = function(species,subspecies,bg,loc_good){
  print("PrintPointsPNG")
  png(paste("Subspecies_assignment_",species,".png",sep=""))
  #print(length(subspecies))
  #col = floor(sqrt(length(subspecies)))
  #row = ceiling((sqrt(length(subspecies))))
  mf = n2mfrow(length(subspecies))
  if(mf[[1]] > mf[[2]]){mf = rev(mf)}
  par(mfrow=mf)

  unkInd = which(levels(loc_good$subspecies)=="unknown")
  notUnkInd = which(levels(loc_good$subspecies)!="unknown")
  loc_good$subspecies = factor(loc_good$subspecies,levels(loc_good$subspecies)[c(unkInd,notUnkInd)])

  for(sub in subspecies){
    print(sub)
    raster::plot(bg, col="grey",colNA="darkgrey",main=paste("assigned",sub,sep=" "),
                 legend=F)
    temp = loc_good[loc_good$assigned==sub,]
    if (nrow(temp) > 0) {

      for (assignNum in 1:length(levels(temp$subspecies))){
        #print(assignNum)
        assignSub = levels(temp$subspecies)[assignNum]
        mycol = palette()[assignNum]

        points(temp[temp$subspecies==assignSub,2:3],
               col=mycol,
               pch=assignNum)

      }

      #points(temp[temp$assigned==sub,2:3],
      #       col=as.factor(temp$subspecies),
      #       pch=as.numeric(as.factor(temp$subspecies)))
      legend("top", legend=as.factor(unique(temp$subspecies)),
             pch=unique(as.numeric(as.factor(temp$subspecies))),
             bty="n",
             col=as.factor(unique(temp$subspecies)))
    } else {
      print("NO POINTS")
    }
  }
  dev.off()
}

#' Print Labeled Good Points to a PDF
#'
#' This function xxx
#'
#' @param species xx
#' @param subspecies xx
#' @param bg xx
#' @param loc_good xx
#'
#' @export
#' @examples
#'
#' printPointsPdfGood(species,subspecies,bg,loc_good)
printPointsPdfGood = function(species,subspecies,bg,loc_good){
  print("PrintPointsPdfGood")
  pdf(paste("Subspecies_assignment_goodSubspp_",species,".pdf",sep=""))
  #print(length(subspecies))
  #col = floor(sqrt(length(subspecies)))
  #row = ceiling((sqrt(length(subspecies))))
  #mf = n2mfrow(length(subspecies))
  #if(mf[[1]] > mf[[2]]){mf = rev(mf)}
  #par(mfrow=mf)

  unkInd = which(levels(loc_good$subspecies)=="unknown")
  notUnkInd = which(levels(loc_good$subspecies)!="unknown")
  loc_good$subspecies = factor(loc_good$subspecies,levels(loc_good$subspecies)[c(unkInd,notUnkInd)])

  for(sub in subspecies){
    print(sub)

    raster::plot(bg, col="grey",colNA="darkgrey",main=paste("assigned",sub,sep=" "),
                 legend=F)
    temp = loc_good[loc_good$assigned==sub,]
    if (nrow(temp) > 0) {

      for (assignNum in 1:length(levels(temp$subspecies))){
        #print(assignNum)
        assignSub = levels(temp$subspecies)[assignNum]
        mycol = palette()[assignNum]

        points(temp[temp$subspecies==assignSub,2:3],
               col=mycol,
               pch=assignNum)

      }

      #points(temp[,2:3],
      #       col=as.factor(temp$subspecies),
      #       pch=as.numeric(as.factor(temp$subspecies)))
      legend("top", legend=as.factor(unique(temp$subspecies)),
             pch=unique(as.numeric(as.factor(temp$subspecies))),
             bty="n",
             col=as.factor(unique(temp$subspecies)))
    } else {
      print("NO POINTS")
    }
  }
  dev.off()
}

#' Print Labeled Suspet Points to a PDF
#'
#' This function xxx
#'
#' @param species xx
#' @param subspecies xx
#' @param bg xx
#' @param loc_suspect xx
#'
#' @export
#' @examples
#'
#' printPointsPdfSuspect(species,subspecies,bg,loc_suspect)
printPointsPdfSuspect = function(species,subspecies,bg,loc_suspect){
  print("PrintPointsSuspect")
  pdf(paste("Subspecies_assignment_suspectSubspp_",species,".pdf",sep=""))
  #print(length(subspecies))
  #col = floor(sqrt(length(subspecies)))
  #row = ceiling((sqrt(length(subspecies))))
  #mf = n2mfrow(length(subspecies))
  #if(mf[[1]] > mf[[2]]){mf = rev(mf)}
  #par(mfrow=mf)

  unkInd = which(levels(loc_good$subspecies)=="unknown")
  notUnkInd = which(levels(loc_good$subspecies)!="unknown")
  loc_good$subspecies = factor(loc_good$subspecies,levels(loc_good$subspecies)[c(unkInd,notUnkInd)])

  for(sub in subspecies){
    print(sub)
    raster::plot(bg, col="grey",colNA="darkgrey",main=paste("assigned",sub,sep=" "),
                 legend=F)
    temp = loc_suspect[loc_suspect$assigned==sub,]
    if(nrow(temp) > 0) {

      for (assignNum in 1:length(levels(temp$subspecies))){
        #print(assignNum)
        assignSub = levels(temp$subspecies)[assignNum]
        mycol = palette()[assignNum]

        points(temp[temp$subspecies==assignSub,2:3],
               col=mycol,
               pch=assignNum)

      }


      #points(temp[temp$assigned==sub,2:3],
      #       col=as.factor(temp$subspecies),
      #       pch=as.numeric(as.factor(temp$subspecies)))
      legend("top", legend=as.factor(unique(temp$subspecies)),
             pch=unique(as.numeric(as.factor(temp$subspecies))),
             bty="n",
             col=as.factor(unique(temp$subspecies)))
    } else {
      print("NO POINTS")
    }
  }
  dev.off()
}

#' Wrapper function that prints to pdf and png
#'
#' This function xxx
#'
#' @param processedSpecies xx
#' @param outputDir xx
#' @param species xx
#' @param subspecies xx
#' @param bg xx
#'
#' @export
#' @examples
#'
#' printPointsPdfSuspect(species,subspecies,bg,loc_suspect)
outputProcessedSpecies = function(processedSpecies,outputDir,species,subspecies,bg) {

  palette(c(adjustcolor(palette()[1],alpha.f=0.5),palette()[2:length(palette())]))

  loc_suspect = processedSpecies$loc_suspect
  loc_good = processedSpecies$loc_good
  pol = processedSpecies$pol

  print("Writing Tables")
  write.table(loc_suspect,paste(paste("SuspectSubspp",species,paste(subspecies,collapse=" "),sep="_"),".txt",sep=""),
              quote=FALSE,sep="\t",row.names=FALSE)
  write.table(loc_good,paste(paste("GoodSubspp",species,paste(subspecies,collapse=" "),sep="_"),".txt",sep=""),
              quote=FALSE,sep="\t",row.names=FALSE)

  print("Saving polygons")
  for (i in 1:length(pol)) {
    obj = pol[[i]]
    name = names(pol)[i]
    #print(name)
    obj2 = sp::SpatialPolygonsDataFrame(obj,data=as.data.frame(rep(1,length(obj))),
                                        match.ID = FALSE)
    #print(obj)
    #print("---")
    rgdal::writeOGR(obj=obj2,dsn=paste(paste("Polygon",species,name,sep="_"),".shp",sep=""),
                    layer=name,driver="ESRI Shapefile")
    #print("end")
  }

  print("Printing PNG and PDF files")
  printPointsPng(species=species,subspecies=subspecies,bg=bg,loc_good=loc_good)
  printPointsPdfGood(species=species,subspecies=subspecies,bg=bg,loc_good=loc_good)
  printPointsPdfSuspect(species=species,subspecies=subspecies,bg=bg,loc_suspect=loc_suspect)

  palette("default")

}

#' Cleaning points by environmental variables
#'
#' This function xxx
#'
#' @param Env xx
#' @param loc xx
#'
#' @export
#' @examples
#'
#' printPointsPdfSuspect(species,subspecies,bg,loc_suspect)
#'
#'
#'
#'
#' Pull out the model with the mean OR and max AUC
#'
#' This function identifies the models from ENMeval that have the minimum OR
#' and maximum AUC per model as a model selection framework.
#'
#' @param res xx
#' @param or xx
#' @param auc xx
#'
#' @export
#' @examples
#'
#' printPointsPdfSuspect(species,subspecies,bg,loc_suspect)
minORmaxAUCmodel = function(res,or="Mean.OR10",auc="Mean.AUC"){
  setsort = res@results[order(res@results[,or]),]
  setsort2 = setsort[order(setsort[,auc], decreasing=TRUE),]
  top = setsort2[1,]
  ## extract out and put in maxent
  best = which(as.character(res@results[,1]) == as.character(setsort2[1,1]))
  return(best)
}

#' Get the best raster
#'
#' This function gets the best niche raster for niches calculated.
#'
#' @param best The best model.
#' @param res xx
#' @param wdToPrint xx
#' @param prefix xx
#' @param suffix xx
#' @param species xx
#' @param Env xx
#' @param locs xx
#'
#' @export
#' @examples
#'
#' printPointsPdfSuspect(species,subspecies,bg,loc_suspect)
getBestRaster = function(best,res,wdToPrint=paste(getwd(),"/",sep=""),prefix,suffix,species,Env,locs){
  setwd(wdToPrint)
  mod.table<-res@results
  no.zero.param <- mod.table[mod.table$nparam != 0,]
  ordered<-no.zero.param[with(no.zero.param, order(delta.AICc)), ]
  opt.mod<-ordered[1,]

  ## beta multiplier
  b.m<-opt.mod$rm
  beta.mulr<- paste('betamultiplier=',b.m,sep='')
  ## java maxent needs everything to be set false
  ## anything you want then needs to be true
  false.args<-c('noautofeature','noproduct','nothreshold','noquadratic','nohinge','nolinear')
  ## then you set anything as true
  feat<-strsplit(as.vector(opt.mod[,2]), ',')[[1]]
  if(feat == 'L'){
    feats = 'linear'
  } else if(feat == 'LQ'){
    feats = c('quadratic', 'linear')
  } else if(feat == 'H'){
    feats = 'hinge'
  } else if(feat == 'P'){
    feats = 'product'
  } else if(feat == 'T'){
    feats = 'threshold'
  } else if(feat == 'LQH'){
    feats = c('linear', 'quadratic', 'hinge')
  } else if(feat == 'LQHP'){
    feats = c('linear', 'quadratic', 'hinge', 'product')
  } else if(feat == 'LQHPT'){
    feats = c('linear', 'quadratic', 'hinge', 'product', 'threshold')
  }
  for (j in 1:length(feats)){false.args[which(sub('no','',false.args)==feats[j])] = feats[j]}
  m <-maxent(Env, locs, args=c(false.args, beta.mulr, 'noremoveduplicates', 'noaddsamplestobackground'))
  pred.raw  <- predict(object= m, x=Env, na.rm=TRUE, format='GTiff',overwrite=TRUE, progress='text',args='logistic')
  ## need to extract the features and the reg multiplier for the model and put in the arguments into the maxent call
  writeRaster(pred.raw,filename=paste(wdToPrint,prefix,"_BestModel_",species," ",suffix,".asc",sep=""),
              format="ascii",overwrite=T)
}



{
  ##TODO: consider lumping?
  ## for now testing with phainopeplaNitens
  ##TODO: change all the lapply to function if possible
  ##TODO: update plotting here (and in other file) so that everything plots iteratively, esp if pairwise
  ##TODO: add in species name not just subspp name

}
detach("package:subsppLabelR", unload=TRUE)
devtools::install_github('kaiyaprovost/subsppLabelR')
library(subsppLabelR,verbose=T)
## need to add a check in here -- remove unknown points with exact same lat/long as a labeled point

## SETUP
{
  setwd("~/Documents/Classes/Finished Classes/Spatial Bioinformatics/project/")

  THIN_BEYOND = FALSE

  ## also split these into subspecies assignments with $assigned
  ## this is a list, can access with $x or [[i]]

  ## if you want you can check through the points that suck to add in more localities
  ## but for now not doing that

  ## now we have groups that are assigned to single areas and don't mismatch
  ## let's split into two different groupings and make some niche models!
  ## then test overlap

  # step 1 -- import environmental variables
  ##TODO: add in other data than Worldclim
  ##TODO: water layer, how often water there is at that spot? 30m layer!!!
  Env = raster::stack(list.files(
    path='/Users/kprovost/Documents/Classes/Finished Classes/Spatial Bioinformatics/spatial_bioinformatics-master/ENM/wc2-5/',
    pattern="\\.bil$",
    full.names=T))
  ext = raster::extent(c(-125,-60,10,50)) ## make sure this will play nice with your points
  Env = raster::crop(Env, ext)
  bg = Env[[1]] ## just for plotting

  ## get locs
  #species = "Phainopepla nitens"
  #subspecies = c("nitens","lepida")

  #species = "Cardinalis sinuatus"
  #subspecies = c("sinuatus","fulvescens","peninsulae")
  #species = "Cardinalis cardinalis"
  #subspecies = c("affinis","canicaudus","cardinalis","carneus",
  #                "clintoni","coccineus","flammiger","floridanus",
  #                "igneus","littoralis","magnirostris","mariae",
  #                "phillipsi","saturatus","seftoni","sinaloensis",
  #                "superbus","townsendi","yucatanicus")

  #subspecies_igne = c("affinis","clintoni","igneus","seftoni","sinaloensis","superbus","townsendi")
  #subspecies_card = c("canicaudus","cardinalis","floridanus","magnirostris")
  #subspecies_cocc = c("coccineus","flammiger","littoralis","phillipsi","yucatanicus")
  #subspecies_rest = c("carneus","mariae","saturatus")
  #test = c("clintoni","affinis")

  ## there is a bug -- if one subspp range is entirely subsumed within another polygon,
  ## will delete that subspecies. no bueno

  #alllocs = "/Users/kprovost/Documents/Classes/Finished Classes/Spatial Bioinformatics/project/big_sinuatus_testrun_NOTWORKING/AllSubspp_Cardinalis sinuatus_sinuatus fulvescens peninsulae_clean.txt"
  #labeledLoc = read.csv(alllocs,sep="\t")
  #locs = labeledLoc
  detach("package:subsppLabelR", unload=TRUE)
  library(subsppLabelR,verbose=T)

  par(ask=F)

}

## GENERATE FUNCTIONS
## ALL OF THESE FUNCTIONS LAYER UNKNOWN POINTS ON LAST
{
  printPointsPng = function(species,subspecies,bg,loc_good){
    print("PrintPointsPNG")
    png(paste("Subspecies_assignment_",species,".png",sep=""))
    #print(length(subspecies))
    #col = floor(sqrt(length(subspecies)))
    #row = ceiling((sqrt(length(subspecies))))
    mf = n2mfrow(length(subspecies))
    if(mf[[1]] > mf[[2]]){mf = rev(mf)}
    par(mfrow=mf)

    unkInd = which(levels(loc_good$subspecies)=="unknown")
    notUnkInd = which(levels(loc_good$subspecies)!="unknown")
    loc_good$subspecies = factor(loc_good$subspecies,levels(loc_good$subspecies)[c(unkInd,notUnkInd)])

    for(sub in subspecies){
      print(sub)
      raster::plot(bg, col="grey",colNA="darkgrey",main=paste("assigned",sub,sep=" "),
                   legend=F)
      temp = loc_good[loc_good$assigned==sub,]
      if (nrow(temp) > 0) {

        for (assignNum in 1:length(levels(temp$subspecies))){
          #print(assignNum)
          assignSub = levels(temp$subspecies)[assignNum]
          mycol = palette()[assignNum]

          points(temp[temp$subspecies==assignSub,2:3],
                 col=mycol,
                 pch=assignNum)

        }

        #points(temp[temp$assigned==sub,2:3],
        #       col=as.factor(temp$subspecies),
        #       pch=as.numeric(as.factor(temp$subspecies)))
        legend("top", legend=as.factor(unique(temp$subspecies)),
               pch=unique(as.numeric(as.factor(temp$subspecies))),
               bty="n",
               col=as.factor(unique(temp$subspecies)))
      } else {
        print("NO POINTS")
      }
    }
    dev.off()
  }

  printPointsPdfGood = function(species,subspecies,bg,loc_good){
    print("PrintPointsPdfGood")
    pdf(paste("Subspecies_assignment_goodSubspp_",species,".pdf",sep=""))
    #print(length(subspecies))
    #col = floor(sqrt(length(subspecies)))
    #row = ceiling((sqrt(length(subspecies))))
    #mf = n2mfrow(length(subspecies))
    #if(mf[[1]] > mf[[2]]){mf = rev(mf)}
    #par(mfrow=mf)

    unkInd = which(levels(loc_good$subspecies)=="unknown")
    notUnkInd = which(levels(loc_good$subspecies)!="unknown")
    loc_good$subspecies = factor(loc_good$subspecies,levels(loc_good$subspecies)[c(unkInd,notUnkInd)])

    for(sub in subspecies){
      print(sub)

      raster::plot(bg, col="grey",colNA="darkgrey",main=paste("assigned",sub,sep=" "),
                   legend=F)
      temp = loc_good[loc_good$assigned==sub,]
      if (nrow(temp) > 0) {

        for (assignNum in 1:length(levels(temp$subspecies))){
          #print(assignNum)
          assignSub = levels(temp$subspecies)[assignNum]
          mycol = palette()[assignNum]

          points(temp[temp$subspecies==assignSub,2:3],
                 col=mycol,
                 pch=assignNum)

        }

        #points(temp[,2:3],
        #       col=as.factor(temp$subspecies),
        #       pch=as.numeric(as.factor(temp$subspecies)))
        legend("top", legend=as.factor(unique(temp$subspecies)),
               pch=unique(as.numeric(as.factor(temp$subspecies))),
               bty="n",
               col=as.factor(unique(temp$subspecies)))
      } else {
        print("NO POINTS")
      }
    }
    dev.off()
  }

  printPointsPdfSuspect = function(species,subspecies,bg,loc_suspect){
    print("PrintPointsSuspect")
    pdf(paste("Subspecies_assignment_suspectSubspp_",species,".pdf",sep=""))
    #print(length(subspecies))
    #col = floor(sqrt(length(subspecies)))
    #row = ceiling((sqrt(length(subspecies))))
    #mf = n2mfrow(length(subspecies))
    #if(mf[[1]] > mf[[2]]){mf = rev(mf)}
    #par(mfrow=mf)

    unkInd = which(levels(loc_good$subspecies)=="unknown")
    notUnkInd = which(levels(loc_good$subspecies)!="unknown")
    loc_good$subspecies = factor(loc_good$subspecies,levels(loc_good$subspecies)[c(unkInd,notUnkInd)])

    for(sub in subspecies){
      print(sub)
      raster::plot(bg, col="grey",colNA="darkgrey",main=paste("assigned",sub,sep=" "),
                   legend=F)
      temp = loc_suspect[loc_suspect$assigned==sub,]
      if(nrow(temp) > 0) {

        for (assignNum in 1:length(levels(temp$subspecies))){
          #print(assignNum)
          assignSub = levels(temp$subspecies)[assignNum]
          mycol = palette()[assignNum]

          points(temp[temp$subspecies==assignSub,2:3],
                 col=mycol,
                 pch=assignNum)

        }


        #points(temp[temp$assigned==sub,2:3],
        #       col=as.factor(temp$subspecies),
        #       pch=as.numeric(as.factor(temp$subspecies)))
        legend("top", legend=as.factor(unique(temp$subspecies)),
               pch=unique(as.numeric(as.factor(temp$subspecies))),
               bty="n",
               col=as.factor(unique(temp$subspecies)))
      } else {
        print("NO POINTS")
      }
    }
    dev.off()
  }

  outputProcessedSpecies = function(processedSpecies,outputDir,species,subspecies,bg) {

    palette(c(adjustcolor(palette()[1],alpha.f=0.5),palette()[2:length(palette())]))

    loc_suspect = processedSpecies$loc_suspect
    loc_good = processedSpecies$loc_good
    pol = processedSpecies$pol

    print("Writing Tables")
    write.table(loc_suspect,paste(paste("SuspectSubspp",species,paste(subspecies,collapse=" "),sep="_"),".txt",sep=""),
                quote=FALSE,sep="\t",row.names=FALSE)
    write.table(loc_good,paste(paste("GoodSubspp",species,paste(subspecies,collapse=" "),sep="_"),".txt",sep=""),
                quote=FALSE,sep="\t",row.names=FALSE)

    print("Saving polygons")
    for (i in 1:length(pol)) {
      obj = pol[[i]]
      name = names(pol)[i]
      #print(name)
      obj2 = sp::SpatialPolygonsDataFrame(obj,data=as.data.frame(rep(1,length(obj))),
                                          match.ID = FALSE)
      #print(obj)
      #print("---")
      rgdal::writeOGR(obj=obj2,dsn=paste(paste("Polygon",species,name,sep="_"),".shp",sep=""),
                      layer=name,driver="ESRI Shapefile")
      #print("end")
    }

    print("Printing PNG and PDF files")
    printPointsPng(species=species,subspecies=subspecies,bg=bg,loc_good=loc_good)
    printPointsPdfGood(species=species,subspecies=subspecies,bg=bg,loc_good=loc_good)
    printPointsPdfSuspect(species=species,subspecies=subspecies,bg=bg,loc_suspect=loc_suspect)

    palette("default")

  }
}



list_of_taxa = "/Users/kprovost/Documents/Dissertation/CHAPTER1_REVIEW/southwestern_subspecies.txt"
taxa = read.csv(list_of_taxa,sep="\t",header=F)
colnames(taxa) = c("GEN","SPP","SUB")

unique_species = unique(taxa[,1:2])

#for (rownum in 1:1) {
for (rownum in 2:nrow(unique_species)) {
  temp = taxa[taxa$GEN==unique_species$GEN[rownum],]
  temp = temp[temp$SPP==unique_species$SPP[rownum],]
  subspp = droplevels.factor(unlist(temp$SUB))
  spp = paste((unlist(temp[1,1:2])),sep=" ",collapse=" ")
  print(spp)
  print(subspp)

  processed = databaseToAssignedSubspecies(spp=spp,
                                           subsppList=subspp,
                                           pointLimit=1000,dbToQuery=c("gbif","bison","ecoengine","vertnet"), ## removed everythign besides gbif and vertnet
                                           quantile=0.95,xmin=-125,xmax=-60,ymin=10,ymax=50,plotIt=T,bgLayer=bg,
                                           outputDir=paste("/Users/kprovost/Documents/Classes/Finished Classes/Spatial Bioinformatics/project/",spp,sep=""),
                                           epsilon=1e-6)

  outputProcessedSpecies(processedSpecies=processed,
                         outputDir=paste("/Users/kprovost/Documents/Classes/Finished Classes/Spatial Bioinformatics/project/",spp,sep=""),
                         species=spp,
                         subspecies=subspp,
                         bg=bg)

}


processedSpecies = databaseToAssignedSubspecies(spp=species,
                                                subsppList=subspecies,
                                                pointLimit=10,dbToQuery=c("gbif","bison","inat","ebird","ecoengine","vertnet"),
                                                quantile=0.95,xmin=-125,xmax=-60,ymin=10,ymax=50,plotIt=T,bgLayer=bg,
                                                outputDir="~/Documents/Classes/Finished Classes/Spatial Bioinformatics/project/",
                                                datafile=alllocs,
                                                epsilon=1e-6)

outputProcessedSpecies(processedSpecies=processedSpecies,outputDir="~/Documents/Classes/Finished Classes/Spatial Bioinformatics/project/",
                       species=species,subspecies=subspecies,bg=bg)

# loc_suspect = rbind(processedSpecies_card$loc_suspect,
#                     processedSpecies_cocc$loc_suspect,
#                     processedSpecies_rest$loc_suspect,
#                     processedSpecies_igne$loc_suspect)
# loc_good = rbind(processedSpecies_card$loc_good,
#                  processedSpecies_cocc$loc_good,
#                  processedSpecies_rest$loc_good,
#                  processedSpecies_igne$loc_good)
# loc_good = rbind(processedSpecies_card$loc_good,
#                  processedSpecies_cocc$loc_good,
#                  processedSpecies_rest$loc_good,
#                  processedSpecies_igne$loc_good)

## build png of the points used


## TESTING A NEURAL NETWORK WITH CARAT
## SOURCE: http://www.cmap.polytechnique.fr/~lepennec/R/Learning/Learning.html
{
  library("plyr")
  library("dplyr")
  library("ggplot2")
  library("grid")
  library("gridExtra")
  library("caret")
  library("h2o")
  library("doFuture")
  registerDoFuture()
  plan(multiprocess)
  library(AppliedPredictiveModeling)
  library(RColorBrewer)
  data(twoClassData)
  twoClass=cbind(as.data.frame(predictors),classes)
  twoClassColor <- brewer.pal(3,'Set1')[1:2]
  names(twoClassColor) <- c('Class1','Class2')
  ggplot(data = twoClass,aes(x = PredictorA, y = PredictorB)) +
    geom_point(aes(color = classes), size = 6, alpha = .5) +
    scale_colour_manual(name = 'classes', values = twoClassColor) +
    scale_x_continuous(expand = c(0,0)) +
    scale_y_continuous(expand = c(0,0))
  nbp <- 250;
  PredA <- seq(min(twoClass$PredictorA), max(twoClass$PredictorA), length = nbp)
  PredB <- seq(min(twoClass$PredictorB), max(twoClass$PredictorB), length = nbp)
  Grid <- expand.grid(PredictorA = PredA, PredictorB = PredB)

  PlotGrid <- function(pred,title) {
    surf <- (ggplot(data = twoClass, aes(x = PredictorA, y = PredictorB,
                                         color = classes)) +
               geom_tile(data = cbind(Grid, classes = pred), aes(fill = classes)) +
               scale_fill_manual(name = 'classes', values = twoClassColor) +
               ggtitle("Decision region") + theme(legend.text = element_text(size = 10)) +
               scale_colour_manual(name = 'classes', values = twoClassColor)) +
      scale_x_continuous(expand = c(0,0)) +
      scale_y_continuous(expand = c(0,0))
    pts <- (ggplot(data = twoClass, aes(x = PredictorA, y = PredictorB,
                                        color = classes)) +
              geom_contour(data = cbind(Grid, classes = pred), aes(z = as.numeric(classes)),
                           color = "red", breaks = c(1.5)) +
              geom_point(size = 4, alpha = .5) +
              ggtitle("Decision boundary") +
              theme(legend.text = element_text(size = 10)) +
              scale_colour_manual(name = 'classes', values = twoClassColor)) +
      scale_x_continuous(expand = c(0,0)) +
      scale_y_continuous(expand = c(0,0))
    grid.arrange(surf, pts, top = textGrob(title, gp = gpar(fontsize = 20)), ncol = 2)
  }
  library("caret")
  V <- 10
  T <- 4
  TrControl <- trainControl(method = "repeatedcv",
                            number = V,
                            repeats = T)

  Seed <- 345
  ErrsCaret <- function(Model, Name) {
    Errs <- data.frame(t(postResample(predict(Model, newdata = twoClass), twoClass[["classes"]])),
                       Resample = "None", model = Name)
    rbind(Errs, data.frame(Model$resample, model = Name))
  }

  Errs <- data.frame()
  CaretLearnAndDisplay <- function (Errs, Name, Formula, Method, ...) {
    set.seed(Seed)
    Model <- train(as.formula(Formula), data = twoClass, method = Method, trControl = TrControl, ...)
    Pred <- predict(Model, newdata = Grid)
    PlotGrid(Pred, Name)
    Errs <- rbind(Errs, ErrsCaret(Model, Name))
  }
  Errs <- CaretLearnAndDisplay(Errs, "Neural Network", "classes ~ .", "mlp")
}
## TRIMS PROPERLY NOW, BUT NOT KEEPING THE CORRECT STUFF?
## LIKE NOT THROWING OUT VERY SMALL THINGS
## ALSO NOT RETAINING NAMES AFTER THE FIX WHICH IS ANNOYING
## HOWEVER 1,2,3 are in alphabetical order still so its easy to fix



# png("Phainopepla nitens assignment.png")
# par(mfrow=c(1,2))
# plot(bg, col="grey",colNA="darkgrey",main="assigned lepida")
# points(loc_good[loc_good$assigned=="lepida",2:3],col=as.factor(loc_good$subspecies),
#        pch=as.numeric(as.factor(loc_good$subspecies)))
# legend("top", legend=as.factor(unique(loc_good$subspecies)),
#        pch=unique(as.numeric(as.factor(loc_good$subspecies))),
#        bty="n",
#        col=as.factor(unique(loc_good$subspecies)))
# plot(bg, col="grey",colNA="darkgrey",main="assigned nitens")
# points(loc_good[loc_good$assigned=="nitens",2:3],col=as.factor(loc_good$subspecies),
#        pch=as.numeric(as.factor(loc_good$subspecies)))
# legend("top", legend=as.factor(unique(loc_good$subspecies)),
#        pch=unique(as.numeric(as.factor(loc_good$subspecies))),
#        bty="n",
#        col=as.factor(unique(loc_good$subspecies)))
# dev.off()

# raster::plot(bg, col="grey",colNA="darkgrey")
# points(loc_good[loc_good$subspecies=="nitens",2:3],col=as.factor(loc_good$assigned),
#        pch=as.numeric(as.factor(loc_good$subspecies)),main="nitens prior")
# raster::plot(bg, col="grey",colNA="darkgrey")
# points(loc_good[loc_good$subspecies=="lepida",2:3],col=as.factor(loc_good$assigned),
#        pch=as.numeric(as.factor(loc_good$subspecies)),main="lepida prior")

## step 2 -- remove any locations with no Env data

if (THIN_BEYOND == TRUE) {

  cleanByEnvironment = function(Env,loc){
    loc[,2] = as.numeric(loc[,2])
    loc[,3] = as.numeric(loc[,3])
    extr = raster::extract(Env, loc[,2:3]) ## gets values from Env that are at loc
    head(extr)
    loc_clean = loc[!is.na(extr[,1]),]
    print(paste("Removed",nrow(loc)-nrow(loc_clean),"rows with no Env data"))

    return(loc_clean)
  }

  loc_good_clean = cleanByEnvironment(Env=Env,loc=loc_good)

  # spThinBySubspecies = function(loc_good_clean,species,overwrite=T){
  #   ## step 3 -- subset data based on subspecies, then run spthin
  #   ## this only gives lat longs, results in a named list with subspp names as names
  #   loc_good_by_subspp = split(loc_good_clean[,2:3], loc_good_clean$assigned)
  #
  #   pathlist <- sapply(names(loc_good_by_subspp),function(x) NULL)
  #
  #   for(i in 1:length(names(loc_good_by_subspp))){
  #     sub = names(loc_good_by_subspp)[[i]]
  #     dir = paste(getwd(),"/Thinned/",sep="")
  #     if (!(file.exists(dir))){
  #       dir.create(file.path(dir))
  #       #setwd(file.path(subDir))
  #     }
  #     subDir = paste(getwd(),"/Thinned/",species," ",sub,sep="")
  #     if (!(file.exists(subDir))){
  #       dir.create(file.path(subDir))
  #       #setwd(file.path(subDir))
  #     }
  #
  #     loc_sub = loc_good_by_subspp[[i]]
  #     loc_sub$name = sub
  #
  #     print(sub)
  #
  #     ## select a single subspecies and thin it
  #
  #     if(overwrite==T){
  #       if ((file.exists(paste(subDir,"/thinned_data_thin1.csv",sep="")))){
  #         file.remove(paste(subDir,"/thinned_data_thin1.csv",sep=""))
  #         #setwd(file.path(subDir))
  #       }
  #     }
  #
  #     path = paste(subDir,"/thinned_data_thin1.csv",sep="")
  #     pathlist[[i]] = path
  #
  #     thin<-spThin::thin(loc.data = loc_sub,
  #                        lat.col = "latitude",
  #                        long.col = "longitude",
  #                        spec.col = "name",
  #                        thin.par = 10, ## km distance that records need to be separated by
  #                        reps = 10, ## number of times to repeat thinning process
  #                        locs.thinned.list.return = T,
  #                        write.files = T,
  #                        max.files = 1,
  #                        out.dir = subDir,
  #                        write.log.file = T)
  #   }
  #
  #   ## convert a list of the paths to the thinned stuff, named by subspecies
  #   newThin = data.frame()
  #   for(path in pathList){
  #     addThin = read.csv(path)
  #     newThin = rbind(newThin,addThin)
  #   }
  #   return(newThin)
  #
  # }

  spThinBySubspecies = function(loc_good_clean,thin.par=10,reps=1,lat.col="latitude",
                                long.col="longitude",spec.col="assigned"){

    locs_thinned=lapply(unique(loc_good_clean$assigned),FUN=function(subspp){
      print(subspp)
      loc_temp = loc_good_clean[loc_good_clean$assigned==subspp,]

      ## note: if you do not  change the  "name" column, it will error out and only use  the first subspecies.
      loc_thin = spThin::thin(loc.data = loc_temp,
                              lat.col = lat.col,
                              long.col = long.col,
                              spec.col = spec.col,
                              thin.par = thin.par, ## km distance that records need to be separated by
                              reps = reps, ## number of times to repeat thinning process
                              locs.thinned.list.return = T,
                              write.files = F,
                              max.files = 1,
                              write.log.file = F)[[1]]
      loc_thin$assigned = subspp
      return(loc_thin)
    })


    loc_thin = do.call(rbind,locs_thinned)
  }


  ## step 3 -- subset data based on subspecies, then run spthin
  ## this only gives lat longs, results in a named list with subspp names as names
  # loc_good_by_subspp = split(loc_good_clean[,c("Longitude","Latitude")], loc_good_clean$assigned)
  #
  # pathlist <- sapply(names(loc_good_by_subspp),function(x) NULL)
  #
  # for(i in 1:length(names(loc_good_by_subspp))){
  #   sub = names(loc_good_by_subspp)[[i]]
  #   dir = paste(getwd(),"/Thinned/",sep="")
  #   if (!(file.exists(dir))){
  #     dir.create(file.path(dir))
  #     #setwd(file.path(subDir))
  #   }
  #   subDir = paste(getwd(),"/Thinned/",species," ",sub,sep="")
  #   if (!(file.exists(subDir))){
  #     dir.create(file.path(subDir))
  #     #setwd(file.path(subDir))
  #   }
  #
  #   loc_sub = loc_good_by_subspp[[i]]
  #   loc_sub$name = sub
  #
  #   print(sub)
  #
  #   ## select a single subspecies and thin it
  #
  #   if(overwrite==T){
  #     if ((file.exists(paste(subDir,"/thinned_data_thin1.csv",sep="")))){
  #       file.remove(paste(subDir,"/thinned_data_thin1.csv",sep=""))
  #       #setwd(file.path(subDir))
  #     }
  #   }
  #
  #   path = paste(subDir,"/thinned_data_thin1.csv",sep="")
  #   pathlist[[i]] = path
  #
  #   thin<-spThin::thin(loc.data = loc_sub,
  #                      lat.col = "latitude",
  #                      long.col = "longitude",
  #                      spec.col = "name",
  #                      thin.par = 10, ## km distance that records need to be separated by
  #                      reps = 10, ## number of times to repeat thinning process
  #                      locs.thinned.list.return = T,
  #                      write.files = T,
  #                      max.files = 1,
  #                      out.dir = subDir,
  #                      write.log.file = T)
  # }
  #
  # ## convert a list of the paths to the thinned stuff, named by subspecies
  # newThin = data.frame()
  # for(path in pathList){
  #   addThin = read.csv(path)
  #   newThin = rbind(newThin,addThin)
  # }
  # return(newThin)

}


loc_thin = spThinBySubspecies(loc_good_clean = loc_good_clean)

## import the thinned data one at a time to run enmeval on
## step 4 -- run EMNeval
##TODO: bg points, buffer or something
##TODO: run all subspp at same time?
##TODO: parallel it?

## buffer the points!


##TODO: get this running on a list rather than doing manually
## and do all the pairwise stuff
backgroundForPCA = function(localities=loc_good[,c("Longitude","Latitude")],
                            r=200000,
                            num=(100*nrow(localities)),
                            e=Env){
  library(ENMTools)
  bg1 = ENMTools::background.points.buffer(localities, radius = r,
                                           n = num, mask = e[[1]])
  extract1 = na.omit(cbind(localities,
                           extract(e, localities), rep(1, nrow(localities))))
  colnames(extract1)[ncol(extract1)] = 'occ'
  extbg1 = na.omit(cbind(bg1, extract(e, bg1), rep(0, nrow(bg1))))
  colnames(extbg1)[ncol(extbg1)] = 'occ'
  dat1 = rbind(extract1, extbg1)
  return(list(bgenv=dat1,bgpoints=bg1,bgext=extract1,bgextbg=extbg1))
}


loc_thin_bgstuff = backgroundForPCA(localities=loc_thin[,2:3])
bg_dat = loc_thin_bgstuff$bgenv
bg_bg = loc_thin_bgstuff$bgpoints


backgroundPerSpecies = function(localities=loc_thin){
  loc_thin_by_subspecies = split(loc_thin, loc_thin$name)
  bgenv_by_subspecies = list()

  bgext_by_subspecies = list()
  bgpoints_by_subspecies = list()
  bgextbg_by_subspecies = list()

  for(i in 1:length(names(loc_thin_by_subspecies))){
    singleSubspp = loc_thin_by_subspecies[[i]]
    subsppName = names(loc_thin_by_subspecies)[[i]]
    single_bgstuff = backgroundForPCA(singleSubspp[,c("Longitude","Latitude")])
    bgenv_by_subspecies[[i]] = single_bgstuff$bgenv
    bgext_by_subspecies[[i]] = single_bgstuff$bgext
    bgpoints_by_subspecies[[i]] = single_bgstuff$bgpoints
    bgextbg_by_subspecies[[i]] = single_bgstuff$bgextbg

  }
  names(bgenv_by_subspecies) = names(loc_thin_by_subspecies)
  names(bgext_by_subspecies) = names(loc_thin_by_subspecies)
  names(bgpoints_by_subspecies) = names(loc_thin_by_subspecies)
  names(bgextbg_by_subspecies) = names(loc_thin_by_subspecies)

  return(list(bgenv_by_subspecies=bgenv_by_subspecies,
              bgext_by_subspecies=bgext_by_subspecies,
              bgpoints_by_subspecies=bgpoints_by_subspecies,
              bgextbg_by_subspecies=bgextbg_by_subspecies))
}

perspecies_bgstuff = backgroundPerSpecies(localities=loc_thin)

## the base ecospat function didn't check for na.rm so I did so
ecospat.grid.clim.dyn_custom <- function(glob, glob1, sp, R, th.sp = 0, th.env = 0,
                                         geomask = NULL,removeNA=T) {

  glob <- as.matrix(glob)
  glob1 <- as.matrix(glob1)
  sp <- as.matrix(sp)
  l <- list()

  if (ncol(glob) > 2)
    stop("cannot calculate overlap with more than two axes")

  if (ncol(glob) == 1) {
    # if scores in one dimension (e.g. LDA,SDM predictions,...)
    xmax <- max(glob[, 1])
    xmin <- min(glob[, 1])
    x <- seq(from = min(glob[, 1]), to = max(glob[, 1]), length.out = R)  # breaks on score gradient 1
    sp.dens <- density(sp[, 1], kernel = "gaussian", from = xmin, to = xmax,
                       n = R, cut = 0)  # calculate the density of occurrences in a vector of R pixels along the score gradient
    # using a gaussian kernel density function, with R bins.
    glob1.dens <- density(glob1[, 1], kernel = "gaussian", from = xmin,
                          to = xmax, n = R, cut = 0)  # calculate the density of environments in glob1
    z <- sp.dens$y * nrow(sp)/sum(sp.dens$y)  # rescale density to the number of occurrences in sp
    # number of occurrence/pixel
    Z <- glob1.dens$y * nrow(glob)/sum(glob1.dens$y)  # rescale density to the number of sites in glob1
    glob1r <- sapply(glob1, findInterval, glob1.dens$x)
    th.env <- quantile(glob1.dens$y[glob1r], th.env)
    glob1rm <- which(Z < th.env)
    spr <- sapply(sp, findInterval, sp.dens$x)
    th.sp <- quantile(sp.dens$y[spr], th.sp)
    sprm <- which(z < th.sp)
    z[sprm] <- 0  # remove infinitesimally small number generated by kernel density function
    Z[glob1rm] <- 0  # remove infinitesimally small number generated by kernel density function

    z.uncor <- z/max(z)  # rescale between [0:1] for comparison with other species
    z.cor <- z/Z  # correct for environment prevalence
    z.cor[is.na(z.cor)] <- 0  # remove n/0 situations
    z.cor[z.cor == "Inf"] <- 0  # remove 0/0 situations
    z.cor <- z.cor/max(z.cor)  # rescale between [0:1] for comparison with other species
    w <- z.uncor
    w[w > 0] <- 1
    l$x <- x
    l$z <- z
    l$z.uncor <- z.uncor
    l$z.cor <- z.cor
    l$Z <- Z
    l$glob <- glob
    l$glob1 <- glob1
    l$sp <- sp
    l$w <- w
  }

  if (ncol(glob) == 2) {
    # if scores in two dimensions (e.g. PCA)

    xmin <- min(glob[, 1])
    xmax <- max(glob[, 1])
    ymin <- min(glob[, 2])
    ymax <- max(glob[, 2])  # data preparation
    glob1r <- data.frame(cbind((glob1[, 1] - xmin)/abs(xmax - xmin), (glob1[,
                                                                            2] - ymin)/abs(ymax - ymin)))  # data preparation
    spr <- data.frame(cbind((sp[, 1] - xmin)/abs(xmax - xmin), (sp[, 2] -
                                                                  ymin)/abs(ymax - ymin)))  # data preparation
    mask <- ascgen(SpatialPoints(cbind((0:(R))/R, (0:(R)/R))),
                   nrcol = R-2, count = FALSE) # data preparation
    sp.dens <- kernelUD(SpatialPoints(spr[, 1:2]), h = "href", grid = mask,
                        kern = "bivnorm")  # calculate the density of occurrences in a grid of RxR pixels along the score gradients
    sp.dens <- raster(xmn = xmin, xmx = xmax, ymn = ymin, ymx = ymax, matrix(sp.dens$ud,
                                                                             nrow = R))
    # using a gaussian kernel density function, with RxR bins.
    # sp.dens$var[sp.dens$var>0 & sp.dens$var<1]<-0
    glob1.dens <- kernelUD(SpatialPoints(glob1r[, 1:2]), grid = mask, kern = "bivnorm")
    glob1.dens <- raster(xmn = xmin, xmx = xmax, ymn = ymin, ymx = ymax,
                         matrix(glob1.dens$ud, nrow = R))
    # glob1.dens$var[glob1.dens$var<1 & glob1.dens$var>0]<-0

    x <- seq(from = min(glob[, 1]), to = max(glob[, 1]), length.out = R)  # breaks on score gradient 1
    y <- seq(from = min(glob[, 2]), to = max(glob[, 2]), length.out = R)  # breaks on score gradient 2
    glob1r <- extract(glob1.dens, glob1)
    Z.th <- quantile(glob1r, th.env,na.rm=removeNA)
    glob1.dens[glob1.dens < Z.th] <- 0
    if (!is.null(geomask)) {
      proj4string(geomask) <- NA
      glob1.dens <- mask(glob1.dens, geomask, updatevalue = 0)  # Geographical mask in the case if the analysis takes place in the geographical space
    }
    Z <- glob1.dens * nrow(glob1)/cellStats(glob1.dens, "sum")

    spr <- extract(sp.dens, sp)
    z.th <- quantile(spr, th.sp)
    sp.dens[Z == 0] <- 0
    sp.dens[sp.dens < z.th] <- 0
    if (!is.null(geomask)) {
      sp.dens <- mask(sp.dens, geomask, updatevalue = 0)  # Geographical mask in the case if the analysis takes place in the geographical space
    }
    z <- sp.dens * nrow(sp)/cellStats(sp.dens, "sum")
    z.uncor <- z/cellStats(z, "max")
    w <- z.uncor  # remove infinitesimally small number generated by kernel density function
    w[w > 0] <- 1
    z.cor <- z/Z  # correct for environment prevalence
    z.cor[is.na(z.cor)] <- 0  # remove n/0 situations
    z.cor <- z.cor/cellStats(z.cor, "max")
    l$x <- x
    l$y <- y
    l$z <- z
    l$z.uncor <- z.uncor
    l$z.cor <- z.cor
    l$Z <- Z
    l$glob <- glob
    l$glob1 <- glob1
    l$sp <- sp
    l$w <- w

  }

  return(l)
}


createPcaToCompare = function(loc_thin_bgstuff,perspecies_bgstuff,species) {
  bg_dat = loc_thin_bgstuff$bgenv
  bg_bg = loc_thin_bgstuff$bgpoints

  bgext_by_subspecies = perspecies_bgstuff$bgext_by_subspecies
  bgenv_by_subspecies = perspecies_bgstuff$bgenv_by_subspecies

  ## pca bg points
  pca.env <- dudi.pca(bg_dat[,3:(ncol(bg_dat)-1)],scannf=F,nf=2)
  #png(paste("PCAcorrelationCircle_",species,".png",sep=""))
  ecospat.plot.contrib(contrib=pca.env$co, eigen=pca.env$eig)
  #dev.off()

  ## pca scores whole study area, all points, all subspecies
  scores_globclim<-pca.env$li # PCA scores for the whole study area (all points)

  ## now get pca scores per species instead

  scores = list()
  scores_clim = list()
  grid_clim = list()

  for(i in 1:length(names(bgext_by_subspecies))){
    singleSubspp_bgext = bgext_by_subspecies[[i]]
    singleSubspp_bgenv = bgenv_by_subspecies[[i]]
    subsppName = names(bgext_by_subspecies)[[i]]

    scores_subspp = suprow(pca.env,
                           singleSubspp_bgext[which(
                             singleSubspp_bgext[,ncol(singleSubspp_bgext)]==1)
                             ,3:(ncol(singleSubspp_bgext)-1)])$li # PCA scores for the species 1 distribution


    scores[[i]] = scores_subspp

    scores_clim_subspp = suprow(pca.env,
                                singleSubspp_bgenv[,3:(ncol(singleSubspp_bgenv)-1)])$li # PCA scores for the whole native study area species 1 ## bgenv

    scores_clim[[i]] = scores_clim_subspp

    ## make a dynamic occurrence densities grid
    ## grid of occ densities along one or two environmental gradients
    ## glob = env variables in background
    ## glob1 = env variables for species
    ## sp = occurrences of species
    ## R = resolution
    ## th.sp = a threshhold to elimite low density values of species occurrences
    grid_clim_subspp <- ecospat.grid.clim.dyn_custom(glob = scores_globclim,
                                                     glob1 = scores_clim_subspp,
                                                     sp = scores_subspp,
                                                     R = 100,
                                                     th.sp = 0,
                                                     th.env = 0,
                                                     removeNA=T)
    grid_clim[[i]] = grid_clim_subspp

  }

  names(scores) = names(bgext_by_subspecies)
  names(scores_clim) = names(bgext_by_subspecies)
  names(grid_clim) = names(bgext_by_subspecies)

  #pdf(paste("NicheSpaceComparison_",species,".pdf",sep=""))
  par(mfrow=n2mfrow(length(grid_clim)),
      ask=F)
  for(i in 1:length(grid_clim)){
    plot(grid_clim[[i]]$w,main=names(grid_clim)[[i]])
  }
  #dev.off()

  return(list(scores_globclim=scores_globclim,
              scores=scores,
              scores_clim=scores_clim,
              grid_clim=grid_clim))

}

pcaOutput = createPcaToCompare(loc_thin_bgstuff=loc_thin_bgstuff,
                               perspecies_bgstuff=perspecies_bgstuff,
                               species=species)


pca_grid_clim = pcaOutput$grid_clim

## PCA the background points
# pca.env <- dudi.pca(bg_dat[,3:(ncol(bg_dat)-1)],scannf=F,nf=2)
# png(paste("PCAcorrelationCircle_",species,".png",sep=""))
# ecospat.plot.contrib(contrib=pca.env$co, eigen=pca.env$eig)
# dev.off()
# scores.sp1 <- suprow(pca.env,
#                      bgext_lepida[which(bgext_lepida[,22]==1),3:21])$li # PCA scores for the species 1 distribution
# scores.sp2 <- suprow(pca.env,
#                      bgext_nitens[which(bgext_nitens[,22]==1),3:21])$li # PCA scores for the species 2 distribution
# scores.clim1 <- suprow(pca.env,bg_lepida[,3:21])$li # PCA scores for the whole native study area species 1 ## bgenv
# scores.clim2 <- suprow(pca.env,bg_nitens[,3:21])$li # PCA scores for the whole native study area species 2
#
# ## make a dynamic occurrence densities grid
# ## grid of occ densities along one or two environmental gradients
# ## glob = env variables in background
# ## glob1 = env variables for species
# ## sp = occurrences of species
# ## R = resolution
# ## th.sp = a threshhold to elimite low density values of species occurrences
# grid.clim1 <- ecospat.grid.clim.dyn(
#   glob = scores.globclim,
#   glob1 = scores.clim1,
#   sp = scores.sp1,
#   R = 100,
#   th.sp = 0
# )
# grid.clim2 <- ecospat.grid.clim.dyn(
#   glob = scores.globclim,
#   glob1 = scores.clim2,
#   sp = scores.sp2,
#   R = 100,
#   th.sp = 0
# )

## get overlaps and test for niche equivalence pairwise

pairwiseNicheOverlap = function(pca_grid_clim=pca_grid_clim){

  overlap_df = data.frame(spp1=character(),
                          spp2=character(),
                          SchoenersD=numeric(),
                          modifiedHellingersI=numeric())

  for(i in 1:length(pca_grid_clim)){
    for(j in 1:length(pca_grid_clim)){
      if(i<j){
        #print(paste(i,j))
        spp1_name = names(pca_grid_clim)[[i]]
        spp1 = pca_grid_clim[[i]]
        spp2_name = names(pca_grid_clim)[[j]]
        spp2 = pca_grid_clim[[j]]
        overlap <- ecospat.niche.overlap(spp1, spp2, cor=T)
        rowToAdd = cbind(as.character(spp1_name),
                         as.character(spp2_name),
                         as.numeric(overlap$D),
                         as.numeric(overlap$I))
        colnames(rowToAdd) = colnames(overlap_df)
        overlap_df = rbind(overlap_df,rowToAdd)
      }
    }
  }
  return(overlap_df)
}

overlap_df = pairwiseNicheOverlap(pca_grid_clim=pca_grid_clim)

# ## calculate overlap between the species niches
# ## Schoener's D
# D.overlap <- ecospat.niche.overlap (grid.clim1, grid.clim2, cor=T)$D
# D.overlap # 0.3298541 for Toxostoma cris vs leco
# ## modified Hellinger's I
# I.overlap = ecospat.niche.overlap (grid.clim1, grid.clim2, cor=T)$I
# I.overlap # 0.5620279 for Toxostoma cris vs leco
# ## this is looking for overlap in PCA-environmental space of the two species


## is this a projection of the niche?
# ## its the pca space occupied by the two species
# png(paste("NicheSpaceComparison_",species,".png",sep=""))
# par(mfrow=c(1,2))
# plot(grid.clim1$w,main="lepida",sub=paste("Sch. D =",round(D.overlap,digits=6)))
# plot(grid.clim2$w,main="nitens",sub=paste("Hel. I =",round(I.overlap,digits=6)))
# dev.off()

## test for niche equvalence pairwise
## first need to modify function again to remove NA
ecospat.niche.equivalency.test_custom <- function(z1, z2, rep, alternative = "higher", ncores=1) {

  R <- length(z1$x)
  l <- list()

  obs.o <- ecospat.niche.overlap(z1, z2, cor = TRUE)  #observed niche overlap

  if (ncores == 1){
    sim.o <- as.data.frame(matrix(unlist(lapply(1:rep, overlap.eq.gen_custom, z1, z2)), byrow = TRUE,
                                  ncol = 2))  #simulate random overlap
  }else{
    #number of cores attributed for the permutation test
    cl <- makeCluster(ncores)  #open a cluster for parallelization
    invisible(clusterEvalQ(cl))  #import the internal function into the cluster
    sim.o <- as.data.frame(matrix(unlist(parLapply(cl, 1:rep, overlap.eq.gen_custom, z1, z2)), byrow = TRUE,
                                  ncol = 2))  #simulate random overlap
    stopCluster(cl)  #shutdown the cluster
  }
  colnames(sim.o) <- c("D", "I")
  l$sim <- sim.o  # storage
  l$obs <- obs.o  # storage

  if (alternative == "higher") {
    l$p.D <- (sum(sim.o$D >= obs.o$D) + 1)/(length(sim.o$D) + 1)  # storage of p-values alternative hypothesis = greater -> test for niche conservatism/convergence
    l$p.I <- (sum(sim.o$I >= obs.o$I) + 1)/(length(sim.o$I) + 1)  # storage of p-values alternative hypothesis = greater -> test for niche conservatism/convergence
  }
  if (alternative == "lower") {
    l$p.D <- (sum(sim.o$D <= obs.o$D) + 1)/(length(sim.o$D) + 1)  # storage of p-values alternative hypothesis = lower -> test for niche divergence
    l$p.I <- (sum(sim.o$I <= obs.o$I) + 1)/(length(sim.o$I) + 1)  # storage of p-values alternative hypothesis = lower -> test for niche divergence
  }

  return(l)
}

##################################################################################################

#### internal functions from ENMtools that are needed for custom ones
overlap.sim.gen <- function(repi, z1, z2, rand.type = rand.type) {
  R1 <- length(z1$x)
  R2 <- length(z2$x)
  if (is.null(z1$y) & is.null(z2$y)) {
    if (rand.type == 1) {
      # if rand.type = 1, both z1 and z2 are randomly shifted, if rand.type =2, only z2 is randomly
      # shifted
      center.z1 <- which(z1$z.uncor == 1)  # define the centroid of the observed niche
      Z1 <- z1$Z/max(z1$Z)
      rand.center.z1 <- sample(1:R1, size = 1, replace = FALSE, prob = Z1)  # randomly (weighted by environment prevalence) define the new centroid for the niche
      xshift.z1 <- rand.center.z1 - center.z1  # shift on x axis
      z1.sim <- z1
      z1.sim$z <- rep(0, R1)  # set intial densities to 0
      for (i in 1:length(z1$x)) {
        i.trans.z1 <- i + xshift.z1
        if (i.trans.z1 > R1 | i.trans.z1 < 0)
          (next)()  # densities falling out of the env space are not considered
        z1.sim$z[i.trans.z1] <- z1$z[i]  # shift of pixels
      }
      z1.sim$z <- (z1$Z != 0) * 1 * z1.sim$z  # remove densities out of existing environments
      z1.sim$z.cor <- (z1.sim$z/z1$Z)/max((z1.sim$z/z1$Z), na.rm = TRUE)  #transform densities into occupancies
      z1.sim$z.cor[which(is.na(z1.sim$z.cor))] <- 0
      z1.sim$z.uncor <- z1.sim$z/max(z1.sim$z, na.rm = TRUE)
      z1.sim$z.uncor[which(is.na(z1.sim$z.uncor))] <- 0
    }

    center.z2 <- which(z2$z.uncor == 1)  # define the centroid of the observed niche
    Z2 <- z2$Z/max(z2$Z)
    rand.center.z2 <- sample(1:R2, size = 1, replace = FALSE, prob = Z2)  # randomly (weighted by environment prevalence) define the new centroid for the niche

    xshift.z2 <- rand.center.z2 - center.z2  # shift on x axis
    z2.sim <- z2
    z2.sim$z <- rep(0, R2)  # set intial densities to 0
    for (i in 1:length(z2$x)) {
      i.trans.z2 <- i + xshift.z2
      if (i.trans.z2 > R2 | i.trans.z2 < 0)
        (next)()  # densities falling out of the env space are not considered
      z2.sim$z[i.trans.z2] <- z2$z[i]  # shift of pixels
    }
    z2.sim$z <- (z2$Z != 0) * 1 * z2.sim$z  # remove densities out of existing environments
    z2.sim$z.cor <- (z2.sim$z/z2$Z)/max((z2.sim$z/z2$Z), na.rm = TRUE)  #transform densities into occupancies
    z2.sim$z.cor[which(is.na(z2.sim$z.cor))] <- 0
    z2.sim$z.uncor <- z2.sim$z/max(z2.sim$z, na.rm = TRUE)
    z2.sim$z.uncor[which(is.na(z2.sim$z.uncor))] <- 0
  }

  if (!is.null(z2$y) & !is.null(z1$y)) {
    if (rand.type == 1) {
      # if rand.type = 1, both z1 and z2 are randomly shifted, if rand.type =2, only z2 is randomly
      # shifted
      centroid.z1 <- which(z1$z.uncor == 1, arr.ind = TRUE)[1, ]  # define the centroid of the observed niche
      Z1 <- z1$Z/max(z1$Z)
      rand.centroids.z1 <- which(Z1 > 0, arr.ind = TRUE)  # all pixels with existing environments in the study area
      weight.z1 <- Z1[Z1 > 0]
      rand.centroid.z1 <- rand.centroids.z1[sample(1:nrow(rand.centroids.z1), size = 1, replace = FALSE,
                                                   prob = weight.z1), ]  # randomly (weighted by environment prevalence) define the new centroid for the niche
      xshift.z1 <- rand.centroid.z1[1] - centroid.z1[1]  # shift on x axis
      yshift.z1 <- rand.centroid.z1[2] - centroid.z1[2]  # shift on y axis
      z1.sim <- z1
      z1.sim$z <- matrix(rep(0, R1 * R1), ncol = R1, nrow = R1)  # set intial densities to 0
      for (i in 1:R1) {
        for (j in 1:R1) {
          i.trans.z1 <- i + xshift.z1
          j.trans.z1 <- j + yshift.z1
          if (i.trans.z1 > R1 | i.trans.z1 < 0)
            (next)()  # densities falling out of the env space are not considered
          if (j.trans.z1 > R1 | j.trans.z1 < 0)
            (next)()
          z1.sim$z[i.trans.z1, j.trans.z1] <- z1$z[i, j]  # shift of pixels
        }
      }
      z1.sim$z <- (z1$Z != 0) * 1 * z1.sim$z  # remove densities out of existing environments
      z1.sim$z.cor <- (z1.sim$z/z1$Z)/max((z1.sim$z/z1$Z), na.rm = TRUE)  #transform densities into occupancies
      z1.sim$z.cor[which(is.na(z1.sim$z.cor))] <- 0
      z1.sim$z.uncor <- z1.sim$z/max(z1.sim$z, na.rm = TRUE)
      z1.sim$z.uncor[which(is.na(z1.sim$z.uncor))] <- 0
    }
    centroid.z2 <- which(z2$z.uncor == 1, arr.ind = TRUE)[1, ]  # define the centroid of the observed niche
    Z2 <- z2$Z/max(z2$Z)
    rand.centroids.z2 <- which(Z2 > 0, arr.ind = TRUE)  # all pixels with existing environments in the study area
    weight.z2 <- Z2[Z2 > 0]
    rand.centroid.z2 <- rand.centroids.z2[sample(1:nrow(rand.centroids.z2), size = 1, replace = FALSE,
                                                 prob = weight.z2), ]  # randomly (weighted by environment prevalence) define the new centroid for the niche
    xshift.z2 <- rand.centroid.z2[1] - centroid.z2[1]  # shift on x axis
    yshift.z2 <- rand.centroid.z2[2] - centroid.z2[2]  # shift on y axis
    z2.sim <- z2
    z2.sim$z <- matrix(rep(0, R2 * R2), ncol = R2, nrow = R2)  # set intial densities to 0
    for (i in 1:R2) {
      for (j in 1:R2) {
        i.trans.z2 <- i + xshift.z2
        j.trans.z2 <- j + yshift.z2
        if (i.trans.z2 > R2 | i.trans.z2 < 0)
          (next)()  # densities falling out of the env space are not considered
        if (j.trans.z2 > R2 | j.trans.z2 < 0)
          (next)()
        z2.sim$z[i.trans.z2, j.trans.z2] <- z2$z[i, j]  # shift of pixels
      }
    }
    z2.sim$z <- (z2$Z != 0) * 1 * z2.sim$z  # remove densities out of existing environments
    z2.sim$z.cor <- (z2.sim$z/z2$Z)/max((z2.sim$z/z2$Z), na.rm = TRUE)  #transform densities into occupancies
    z2.sim$z.cor[which(is.na(z2.sim$z.cor))] <- 0
    z2.sim$z.uncor <- z2.sim$z/max(z2.sim$z, na.rm = TRUE)
    z2.sim$z.uncor[which(is.na(z2.sim$z.uncor))] <- 0
  }

  if (rand.type == 1) {
    o.i <- ecospat.niche.overlap(z1.sim, z2.sim, cor = TRUE)
  }
  if (rand.type == 2)
  {
    o.i <- ecospat.niche.overlap(z1, z2.sim, cor = TRUE)
  }  # overlap between random and observed niches
  sim.o.D <- o.i$D  # storage of overlaps
  sim.o.I <- o.i$I
  return(c(sim.o.D, sim.o.I))
}
overlap.eq.gen_custom <- function(repi, z1, z2) {
  if (is.null(z1$y)) {
    # overlap on one axis

    occ.pool <- c(z1$sp, z2$sp)  # pool of random occurrences
    rand.row <- sample(1:length(occ.pool), length(z1$sp))  # random reallocation of occurrences to datasets
    sp1.sim <- occ.pool[rand.row]
    sp2.sim <- occ.pool[-rand.row]
  }

  if (!is.null(z1$y)) {
    # overlap on two axes

    occ.pool <- rbind(z1$sp, z2$sp)  # pool of random occurrences
    row.names(occ.pool)<-c()  # remove the row names
    rand.row <- sample(1:nrow(occ.pool), nrow(z1$sp))  # random reallocation of occurrences to datasets
    sp1.sim <- occ.pool[rand.row, ]
    sp2.sim <- occ.pool[-rand.row, ]
  }

  z1.sim <- ecospat.grid.clim.dyn_custom(z1$glob, z1$glob1, data.frame(sp1.sim), R = length(z1$x))  # gridding
  z2.sim <- ecospat.grid.clim.dyn_custom(z2$glob, z2$glob1, data.frame(sp2.sim), R = length(z2$x))

  o.i <- ecospat.niche.overlap(z1.sim, z2.sim, cor = TRUE)  # overlap between random and observed niches
  sim.o.D <- o.i$D  # storage of overlaps
  sim.o.I <- o.i$I
  return(c(sim.o.D, sim.o.I))
}



pairwiseNicheEquivalence = function(pca_grid_clim=pca_grid_clim,rep1=10,rep2=1000){
  for(i in 1:length(pca_grid_clim)){
    for(j in 1:length(pca_grid_clim)){
      if(i<j){
        print(paste(i,j))
        spp1_name = names(pca_grid_clim)[[i]]
        spp1 = pca_grid_clim[[i]]
        spp2_name = names(pca_grid_clim)[[j]]
        spp2 = pca_grid_clim[[j]]

        eq.test <- ecospat.niche.equivalency.test_custom(z1=spp1, z2=spp2,
                                                         rep=rep1, alternative = "higher")
        sim.test <- ecospat.niche.similarity.test(z1=spp1, z2=spp2,
                                                  rep=rep2, alternative = "higher",
                                                  rand.type=2)
        sim.test2 <- ecospat.niche.similarity.test(z1=spp1, z2=spp2,
                                                   rep=rep2, alternative = "higher",
                                                   rand.type=2)

        pdf(paste("EquivalencyOverlapTests_",species,"_",spp1_name,"_",spp2_name,".pdf",sep=""))
        par(mfrow=c(2,3))
        ecospat.plot.overlap.test(eq.test, "D", "Equivalency")
        ecospat.plot.overlap.test(sim.test, "D", paste("Similarity ",spp1_name,"->",spp2_name,sep=""))
        ecospat.plot.overlap.test(sim.test2, "D", paste("Similarity ",spp2_name,"->",spp1_name,sep=""))
        ecospat.plot.overlap.test(eq.test, "I", "Equivalency")
        ecospat.plot.overlap.test(sim.test, "I", paste("Similarity ",spp1_name,"->",spp2_name,sep=""))
        ecospat.plot.overlap.test(sim.test2, "I", paste("Similarity ",spp2_name,"->",spp1_name,sep=""))
        dev.off()

      }
    }
  }
}

pairwiseNicheEquivalence(pca_grid_clim=pca_grid_clim,rep1=10,rep2=1000)

#
# ## test for niche equivalence
# ## this is the observed niche overlap when you randomize species id
# ## greater means you test for niche conservatism
# ## lower means you test for niche divergence
# eq.test <- ecospat.niche.equivalency.test(grid.clim1, grid.clim2,
#                                           rep=10, alternative = "higher") ##rep = 1000 recommended for operational runs
# ## for 10 takes 1 minute
#
# ## then test for niche similarity --
# ## overlap between spp 1 and overlaps between random spp 2 bg niches
# ## rand.type 2 means only z2 is randomly shifted
# sim.test <- ecospat.niche.similarity.test(grid.clim1, grid.clim2,
#                                           rep=1000, alternative = "higher",
#                                           rand.type=2)
# sim.test2 <- ecospat.niche.similarity.test(grid.clim2,grid.clim1,
#                                            rep=1000, alternative = "higher",
#                                            rand.type=2)
# png(paste("EquivalencyOverlapTests_",species,"_nitens vs lepida",".png",sep=""))
# par(mfrow=c(2,3))
# ecospat.plot.overlap.test(eq.test, "D", "Equivalency")
# ecospat.plot.overlap.test(sim.test, "D", "Similarity 1->2")
# ecospat.plot.overlap.test(sim.test2, "D", "Similarity 2->1")
# ecospat.plot.overlap.test(eq.test, "I", "Equivalency")
# ecospat.plot.overlap.test(sim.test, "I", "Similarity 1->2")
# ecospat.plot.overlap.test(sim.test2, "I", "Similarity 2->1")
# dev.off()


## generate the actual models

library(ENMeval)

listENMresults = lapply(1:length(nitens_by_subspp),function(i){
  ##TODO: add in bg points from above
  subspp = names(nitens_by_subspp)[[i]]
  print(paste("Running",subspp))
  res = ENMevaluate(occ=nitens_by_subspp[[i]], env = Env, method='block',
                    parallel=T, numCores=4, fc=c("L", "LQ", "H"), #c("L", "LQ", "H", "LQH", "LQHP", "LQHPT")
                    RMvalues=seq(0.5,4,0.5), rasterPreds=F)
  #names(res) = names(nitens_by_subspp)[[i]]
  return(res)
})
names(listENMresults) = names(nitens_by_subspp)

# Mean.AUC is the one to use,
## full.aUC it's the AUC of the model used for AICc comparisons
## using all the points and no withheld data
## for omission rate, if small sample size and high confidence
## do mean.ORmin. if don't know, do mean.OR10

## now find the best models
## minimize omission rate and optimize AUC values
## remember to change ORmin to OR10 depending on sample size

minORmaxAUCmodel = function(res,or="Mean.OR10",auc="Mean.AUC"){
  setsort = res@results[order(res@results[,or]),]
  setsort2 = setsort[order(setsort[,auc], decreasing=TRUE),]
  top = setsort2[1,]
  ## extract out and put in maxent
  best = which(as.character(res@results[,1]) == as.character(setsort2[1,1]))
  return(best)
}

bestModels = lapply(1:length(listENMresults),function(i){
  res = listENMresults[[i]]
  name = names(listENMresults)[[i]]
  print(paste("Choosing best model for",name))
  best = minORmaxAUCmodel(res=listENMresults[[i]])
  return(best)
})
names(bestModels) = names(listENMresults)


# getBestRaster = function(best,res,wdToPrint=paste(getwd(),"/",sep=""),prefix,suffix,species,Env){
#   setwd(wdToPrint)
#   pred.raw = predict(Env, res@models[[best]]) ## need to run a new model in maxent with those settings
#   ## need to extract the features and the reg multiplier for the model and put in the arguments into the maxent call
#   writeRaster(pred.raw,filename=paste(wdToPrint,prefix,"_BestModel_",species," ",suffix,".asc",sep=""),
#               format="ascii",overwrite=T)
#   return(pred.raw)
# }

#####

getBestRaster = function(best,res,wdToPrint=paste(getwd(),"/",sep=""),prefix,suffix,species,Env,locs){
  setwd(wdToPrint)
  mod.table<-res@results
  no.zero.param <- mod.table[mod.table$nparam != 0,]
  ordered<-no.zero.param[with(no.zero.param, order(delta.AICc)), ]
  opt.mod<-ordered[1,]

  ## beta multiplier
  b.m<-opt.mod$rm
  beta.mulr<- paste('betamultiplier=',b.m,sep='')
  ## java maxent needs everything to be set false
  ## anything you want then needs to be true
  false.args<-c('noautofeature','noproduct','nothreshold','noquadratic','nohinge','nolinear')
  ## then you set anything as true
  feat<-strsplit(as.vector(opt.mod[,2]), ',')[[1]]
  if(feat == 'L'){
    feats = 'linear'
  } else if(feat == 'LQ'){
    feats = c('quadratic', 'linear')
  } else if(feat == 'H'){
    feats = 'hinge'
  } else if(feat == 'P'){
    feats = 'product'
  } else if(feat == 'T'){
    feats = 'threshold'
  } else if(feat == 'LQH'){
    feats = c('linear', 'quadratic', 'hinge')
  } else if(feat == 'LQHP'){
    feats = c('linear', 'quadratic', 'hinge', 'product')
  } else if(feat == 'LQHPT'){
    feats = c('linear', 'quadratic', 'hinge', 'product', 'threshold')
  }
  for (j in 1:length(feats)){false.args[which(sub('no','',false.args)==feats[j])] = feats[j]}
  m <-maxent(Env, locs, args=c(false.args, beta.mulr, 'noremoveduplicates', 'noaddsamplestobackground'))
  pred.raw  <- predict(object= m, x=Env, na.rm=TRUE, format='GTiff',overwrite=TRUE, progress='text',args='logistic')
  ## need to extract the features and the reg multiplier for the model and put in the arguments into the maxent call
  writeRaster(pred.raw,filename=paste(wdToPrint,prefix,"_BestModel_",species," ",suffix,".asc",sep=""),
              format="ascii",overwrite=T)
}



bestRasters = lapply(1:length(bestModels),function(i){
  locs = nitens_by_subspp[[i]]
  name = names(listENMresults)[[i]]
  print(paste("Making rasters for",name))
  test = getBestRaster(best=bestModels[[i]],res=listENMresults[[i]],prefix="PredRaw",suffix=name,species=species,Env=Env,locs=locs)
  plot(test, col=viridis::viridis(99))
  return(test)
})
names(bestRasters) = names(listENMresults)

png(paste("BestRastersTest_",species," ","nitens_vs_lepida.png",sep=""))
par(mfrow=c(1,2))
plot(bestRasters[[1]],col=viridis::viridis(99),main=names(bestRasters)[[1]])
plot(bestRasters[[2]],col=viridis::viridis(99),main=names(bestRasters)[[2]])
dev.off()

## now do thresholding
## get the enmeval output

threshRasters = lapply(1:length(bestRasters),function(i){
  pred = bestRasters[[i]]
  name = names(bestRasters)[[i]]
  best = bestModels[[i]]
  ev.set <- evaluate(nitens_by_subspp[[i]], listENMresults[[i]]@bg.pts, listENMresults[[i]]@models[[best]], Env)
  th1 = threshold(ev.set) ## omission options
  p1.kappa = pred >= th1$kappa ## equal sensitivity and specificity according to ROC curve
  p1.spsen = pred >= th1$spec_sens
  p1.nomit = pred >= th1$no_omission
  p1.preva = pred >= th1$prevalence
  p1.equal = pred >= th1$equal_sens_spec
  p1.sns09 = pred >= th1$sensitivity

  png(paste("Thresholds_",species," ",name,".png",sep=""))
  par(mfrow=c(2,3))
  plot(p1.kappa, col=viridis::viridis(99),main=paste(name,"kappa"))#; points(nitens_by_subspp[[i]],col=rgb(0,0,0,0.01))
  plot(p1.spsen, col=viridis::viridis(99),main=paste(name,"spec_sens"))#; points(nitens_by_subspp[[i]],col=rgb(0,0,0,0.01))
  plot(p1.nomit, col=viridis::viridis(99),main=paste(name,"no_omisison"))#; points(nitens_by_subspp[[i]],col=rgb(0,0,0,0.01))
  plot(p1.preva, col=viridis::viridis(99),main=paste(name,"prevalence"))#; points(nitens_by_subspp[[i]],col=rgb(0,0,0,0.01))
  plot(p1.equal, col=viridis::viridis(99),main=paste(name,"equal_sens_spec"))#; points(nitens_by_subspp[[i]],col=rgb(0,0,0,0.01))
  plot(p1.sns09, col=viridis::viridis(99),main=paste(name,"sensitivity0.9"))#; points(nitens_by_subspp[[i]],col=rgb(0,0,0,0.01))
  dev.off()

  writeRaster(p1.kappa,filename=paste("Threshold_",species," ",name,"_kappa.asc",sep=""),format="ascii",overwrite=T)
  writeRaster(p1.spsen,filename=paste("Threshold_",species," ",name,"_spec_sens.asc",sep=""),format="ascii",overwrite=T)
  writeRaster(p1.nomit,filename=paste("Threshold_",species," ",name,"_no_omisison.asc",sep=""),format="ascii",overwrite=T)
  writeRaster(p1.preva,filename=paste("Threshold_",species," ",name,"_prevalence.asc",sep=""),format="ascii",overwrite=T)
  writeRaster(p1.equal,filename=paste("Threshold_",species," ",name,"_equal_sens_spec.asc",sep=""),format="ascii",overwrite=T)
  writeRaster(p1.sns09,filename=paste("Threshold_",species," ",name,"_sensitivity0.9.asc",sep=""),format="ascii",overwrite=T)

  th1ras = list(kappa=p1.kappa,
                specSens = p1.spsen,
                noOmit = p1.nomit,
                prevalence = p1.preva,
                equalSensSpec = p1.equal,
                sensitivity_0.9 = p1.sns09)
  return(th1ras)

})
names(threshRasters) = names(bestRasters)

print("Are these really subspecies, folks?")




### plotting stuff
plot(bg, col="grey",colNA="darkgrey")
points(loc_good$longitude,loc_good$latitude,col=as.factor(loc_good$assigned))

plot(bg, col="grey",colNA="darkgrey")
points(loc_suspect$longitude,loc_suspect$latitude,col=as.factor(loc_suspect$assigned),
       pch="x")

}

{
  ##TODO: consider lumping?
  ## for now testing with phainopeplaNitens
  ##TODO: change all the lapply to function if possible
  ##TODO: update plotting here (and in other file) so that everything plots iteratively, esp if pairwise
  ##TODO: add in species name not just subspp name

}
detach("package:subsppLabelR", unload=TRUE)
devtools::install_github('kaiyaprovost/subsppLabelR')
library(subsppLabelR,verbose=T)
## need to add a check in here -- remove unknown points with exact same lat/long as a labeled point

## SETUP
{
  setwd("~/Documents/Classes/Finished Classes/Spatial Bioinformatics/project/")

  THIN_BEYOND = FALSE

  ## also split these into subspecies assignments with $assigned
  ## this is a list, can access with $x or [[i]]

  ## if you want you can check through the points that suck to add in more localities
  ## but for now not doing that

  ## now we have groups that are assigned to single areas and don't mismatch
  ## let's split into two different groupings and make some niche models!
  ## then test overlap

  # step 1 -- import environmental variables
  ##TODO: add in other data than Worldclim
  ##TODO: water layer, how often water there is at that spot? 30m layer!!!
  Env = raster::stack(list.files(
    path='/Users/kprovost/Documents/Classes/Finished Classes/Spatial Bioinformatics/spatial_bioinformatics-master/ENM/wc2-5/',
    pattern="\\.bil$",
    full.names=T))
  ext = raster::extent(c(-125,-60,10,50)) ## make sure this will play nice with your points
  Env = raster::crop(Env, ext)
  bg = Env[[1]] ## just for plotting

  ## get locs
  #species = "Phainopepla nitens"
  #subspecies = c("nitens","lepida")

  #species = "Cardinalis sinuatus"
  #subspecies = c("sinuatus","fulvescens","peninsulae")
  #species = "Cardinalis cardinalis"
  #subspecies = c("affinis","canicaudus","cardinalis","carneus",
  #                "clintoni","coccineus","flammiger","floridanus",
  #                "igneus","littoralis","magnirostris","mariae",
  #                "phillipsi","saturatus","seftoni","sinaloensis",
  #                "superbus","townsendi","yucatanicus")

  #subspecies_igne = c("affinis","clintoni","igneus","seftoni","sinaloensis","superbus","townsendi")
  #subspecies_card = c("canicaudus","cardinalis","floridanus","magnirostris")
  #subspecies_cocc = c("coccineus","flammiger","littoralis","phillipsi","yucatanicus")
  #subspecies_rest = c("carneus","mariae","saturatus")
  #test = c("clintoni","affinis")

  ## there is a bug -- if one subspp range is entirely subsumed within another polygon,
  ## will delete that subspecies. no bueno

  #alllocs = "/Users/kprovost/Documents/Classes/Finished Classes/Spatial Bioinformatics/project/big_sinuatus_testrun_NOTWORKING/AllSubspp_Cardinalis sinuatus_sinuatus fulvescens peninsulae_clean.txt"
  #labeledLoc = read.csv(alllocs,sep="\t")
  #locs = labeledLoc
  detach("package:subsppLabelR", unload=TRUE)
  library(subsppLabelR,verbose=T)

  par(ask=F)

}

## GENERATE FUNCTIONS
## ALL OF THESE FUNCTIONS LAYER UNKNOWN POINTS ON LAST



list_of_taxa = "/Users/kprovost/Documents/Dissertation/CHAPTER1_REVIEW/southwestern_subspecies.txt"
taxa = read.csv(list_of_taxa,sep="\t",header=F)
colnames(taxa) = c("GEN","SPP","SUB")

unique_species = unique(taxa[,1:2])

#for (rownum in 1:1) {
for (rownum in 2:nrow(unique_species)) {
  temp = taxa[taxa$GEN==unique_species$GEN[rownum],]
  temp = temp[temp$SPP==unique_species$SPP[rownum],]
  subspp = droplevels.factor(unlist(temp$SUB))
  spp = paste((unlist(temp[1,1:2])),sep=" ",collapse=" ")
  print(spp)
  print(subspp)

  processed = databaseToAssignedSubspecies(spp=spp,
                                           subsppList=subspp,
                                           pointLimit=1000,dbToQuery=c("gbif","bison","ecoengine","vertnet"), ## removed everythign besides gbif and vertnet
                                           quantile=0.95,xmin=-125,xmax=-60,ymin=10,ymax=50,plotIt=T,bgLayer=bg,
                                           outputDir=paste("/Users/kprovost/Documents/Classes/Finished Classes/Spatial Bioinformatics/project/",spp,sep=""),
                                           epsilon=1e-6)

  outputProcessedSpecies(processedSpecies=processed,
                         outputDir=paste("/Users/kprovost/Documents/Classes/Finished Classes/Spatial Bioinformatics/project/",spp,sep=""),
                         species=spp,
                         subspecies=subspp,
                         bg=bg)

}


processedSpecies = databaseToAssignedSubspecies(spp=species,
                                                subsppList=subspecies,
                                                pointLimit=10,dbToQuery=c("gbif","bison","inat","ebird","ecoengine","vertnet"),
                                                quantile=0.95,xmin=-125,xmax=-60,ymin=10,ymax=50,plotIt=T,bgLayer=bg,
                                                outputDir="~/Documents/Classes/Finished Classes/Spatial Bioinformatics/project/",
                                                datafile=alllocs,
                                                epsilon=1e-6)

outputProcessedSpecies(processedSpecies=processedSpecies,outputDir="~/Documents/Classes/Finished Classes/Spatial Bioinformatics/project/",
                       species=species,subspecies=subspecies,bg=bg)

if (THIN_BEYOND == TRUE) {



  loc_good_clean = cleanByEnvironment(Env=Env,loc=loc_good)


  loc_thin = do.call(rbind,locs_thinned)
}

}
kaiyaprovost/subsppLabelR documentation built on March 17, 2024, 5:09 p.m.