#' @title BuildRF Model with locations of mountain lion kill sites (summer and winter models)
#' @description Build RF Model with locations of mountain lion kill sites
#' @param pred.data path of data to be used to build RF model
#' @param ncpu number of cpus to parallelize on
#' @param withold percent of data (decimal format) to withold for training model (default 0.25)
#' @param method method used for partitioning and training model ("bootstrap" or "repeatedcv")
#' @param raspath path to raster stack
#' @param studypath path to study area polygon (where you want to define availability)
#' @param pathout path to where predicted RF map should be written
#' @return Returns a list object with RFData necessary to predict probably of use in RF models (elk, coyotes, and mountain lions)
#' @keywords elk, coyote, mountain lion, random forest, extract, raster, sample
#' @export
BuildPredRiskModel<-function(pred.data, ncpu, withold,method, combine.data){
library(raster)
library(rgdal)
library(randomForest)
library(doParallel)
library(pROC)
pr<-read.csv(pred.data,stringsAsFactors = F) ### Extracted data for Used and aVailable points
pr$RasterStack_NLCD_11_30<-as.factor(pr$RasterStack_NLCD_11_30)
pr$NewKill<-ifelse(pr$Kill == 1 & pr$Season == "Winter", 2, ifelse(pr$Kill == 1 & pr$Season == "Summer", 3, pr$Kill))
pr<-pr[complete.cases(pr),]
SummerAll<-pr[pr$Season == "Summer",]
WinterAll<-pr[pr$Season == "Winter",]
pred.names<-(names(pr[,c(9:15, 17:23)]))
#### Summer and Winter Models Separately ####
sum<-TediousEcology::boot.fun(data = SummerAll, prop = 0.8, n.boot = 500, samplesize = c(50, 10), mtry = 4, cutoff = 0.75, pred.names = pred.names, cat.column = "Kill")
sum<-sum[[1]]
sumagg<-aggregate(sum$KillPred, by = list(sum$ID, sum$Kill), FUN = mean, na.rm =TRUE)
sumagg$SD<-aggregate(sum$KillPred, by = list(sum$ID, sum$Kill), FUN = sd, na.rm = TRUE)[,3]
sumagg$SE<-aggregate(sum$KillPred, by = list(sum$ID, sum$Kill), FUN = function(x) sd(x)/sqrt(length(x)))[,3]
names(sumagg)<-c('ID', 'Kill','KillPrediction', 'SD', 'SE')
library(easyGgplot2)
library(ggpubr)
print(sumplot<-ggplot2.violinplot(data = sumagg, xName = 'Kill', yName = 'KillPrediction',addDot = TRUE, dotSize = 0.5, dotPosition = "center"))
print(sumplot<-sumplot + theme(panel.grid.major = element_line(linetype = "blank"),
panel.grid.minor = element_line(linetype = "blank"),
panel.background = element_rect(fill = NA)) +labs(title = "Summer"))
### Winter ###
win<-TediousEcology::boot.fun(data = WinterAll, prop = 0.8, n.boot = 500, mtry = 4, samplesize = c(50,10), cutoff = 0.75, pred.names = pred.names, cat.column = "Kill")
win<-win[[1]]
winagg<-aggregate(win$KillPred, by = list(win$ID, win$Kill), FUN = mean, na.rm =TRUE)
winagg$SD<-aggregate(win$KillPred, by = list(win$ID, win$Kill), FUN = sd, na.rm = TRUE)[,3]
winagg$SE<-aggregate(win$KillPred, by = list(win$ID, win$Kill), FUN = function(x) sd(x)/sqrt(length(x)))[,3]
names(winagg)<-c('ID', 'Kill','KillPrediction', 'SD', 'SE')
print(winplot<-ggplot2.violinplot(data = winagg, xName = 'Kill', yName = 'KillPrediction',addDot = TRUE, dotSize = 0.5, dotPosition = "center"))
print(winplot<-winplot + theme(panel.grid.major = element_line(linetype = "blank"),
panel.grid.minor = element_line(linetype = "blank"),
panel.background = element_rect(fill = NA)) +labs(title = "Winter"))
print(season.plot<-ggarrange(sumplot, winplot))
##### Combine Winter and Summer ######
dev.off()
all.season<-TediousEcology::boot.fun(data = pr, prop = 0.8, n.boot = 500, mtry = 4, samplesize = c(50,10), cutoff = 0.75, pred.names = pred.names, cat.column = "Kill")
all.season<-all.season[[1]]
all.agg<-aggregate(all.season$KillPred, by = list(all.season$ID, all.season$Kill, all.season$Season), FUN = mean, na.rm =TRUE)
all.agg$SD<-aggregate(all.season$KillPred, by = list(all.season$ID, all.season$Kill, all.season$Season), FUN = sd, na.rm = TRUE)[,3]
all.agg$SE<-aggregate(all.season$KillPred, by = list(all.season$ID, all.season$Kill, all.season$Season), FUN = function(x) sd(x)/sqrt(length(x)))[,3]
names(all.agg)<-c('ID', 'Kill','Season', 'KillPrediction', 'SD', 'SE')
print(vplot<-ggplot2.violinplot(data = all.agg, xName = 'Kill', yName = 'KillPrediction',addDot = TRUE, dotSize = 0.5, dotPosition = "center",
groupName = 'Season', groupColors = c('firebrick3', 'dodgerblue4')))
print(vplot<-vplot + theme(panel.grid.major = element_line(linetype = "blank"),
panel.grid.minor = element_line(linetype = "blank"),
panel.background = element_rect(fill = NA)) +labs(title = "Distribution of Kill Site Predictions"))
####### Test for differences in Winter and Summer ###########
dev.off()
new.dat<-pr[pr$NewKill >0, ]
winsum<-TediousEcology::boot.fun(data = new.dat, prop = 0.8, n.boot = 500, samplesize = c(1,1), mtry = 4, cutoff = 0.5, pred.names = pred.names, cat.column = "NewKill")
winsum<-winsum[[1]]
winsumagg<-aggregate(winsum$KillPred, by = list(winsum$ID, winsum$NewKill), FUN = mean, na.rm =TRUE)
winsumagg$SD<-aggregate(winsum$KillPred, by = list(winsum$ID, winsum$NewKill), FUN = sd, na.rm = TRUE)[,3]
winsumagg$SE<-aggregate(winsum$KillPred, by = list(winsum$ID, winsum$NewKill), FUN = function(x) sd(x)/sqrt(length(x)))[,3]
names(winsumagg)<-c('ID', 'Kill','KillPrediction', 'SD', 'SE')
print(winsumplot<-ggplot2.violinplot(data = winsumagg, xName = 'Kill', yName = 'KillPrediction',addDot = TRUE, dotSize = 0.5, dotPosition = "center"))
print(winsumplot<-winsumplot + theme(panel.grid.major = element_line(linetype = "blank"),
panel.grid.minor = element_line(linetype = "blank"),
panel.background = element_rect(fill = NA)) +labs(title = "Distribution of Winter and Summer Kill Site"))
######### Investigate how model can be improved with combined winter and summer data #############
dev.off()
all.season<-TediousEcology::boot.fun(data = pr, prop = 0.8, n.boot = 500, mtry = 4, samplesize = c(50,10), cutoff = 0.75, pred.names = pred.names, cat.column = "Kill")
all.season<-all.season[[1]]
all.agg<-aggregate(all.season$KillPred, by = list(all.season$ID, all.season$Kill), FUN = mean, na.rm =TRUE)
all.agg$SD<-aggregate(all.season$KillPred, by = list(all.season$ID, all.season$Kill), FUN = sd, na.rm = TRUE)[,3]
all.agg$SE<-aggregate(all.season$KillPred, by = list(all.season$ID, all.season$Kill), FUN = function(x) sd(x)/sqrt(length(x)))[,3]
names(all.agg)<-c('ID', 'Kill','KillPrediction', 'SD', 'SE')
print(vplot<-ggplot2.violinplot(data = all.agg, xName = 'Kill', yName = 'KillPrediction',addDot = TRUE, dotSize = 1, dotPosition = "jitter"))
print(vplot<-vplot + theme(panel.grid.major = element_line(linetype = "blank"),
panel.grid.minor = element_line(linetype = "blank"),
panel.background = element_rect(fill = NA)) +labs(title = "Distribution of Kill Site Predictions"))
####### Pull out data where the predictions are "good" ######
whole.agg<-all.agg
whole.agg$Keep<-ifelse(whole.agg$Kill == 1 & whole.agg$KillPrediction < 0.2 | whole.agg$Kill == 0 & whole.agg$KillPrediction > 0.8, 0, 1)
keep<-whole.agg[whole.agg$Keep ==1,]
nokeep<-whole.agg[whole.agg$Keep == 0,]
unikeep<-paste(keep$ID, keep$Kill, sep = "_")
uninokeep<-paste(nokeep$ID, nokeep$Kill, sep = "_")
pr$NewID<-paste(pr$ID, pr$Kill, sep = "_")
newdata<-pr[pr$NewID %in% unikeep,]
otherdata<-pr[pr$NewID %in% uninokeep,]
all.season.2<-TediousEcology::boot.fun(data = newdata, prop = 0.8, n.boot = 500, mtry = 4, samplesize = c(50,10), cutoff = 0.75, pred.names = pred.names, cat.column = "Kill")
all.season.2<-all.season.2[[1]]
all.agg<-aggregate(all.season.2$KillPred, by = list(all.season.2$ID, all.season.2$Kill), FUN = mean, na.rm =TRUE)
all.agg$SD<-aggregate(all.season.2$KillPred, by = list(all.season.2$ID, all.season.2$Kill), FUN = sd, na.rm = TRUE)[,3]
all.agg$SE<-aggregate(all.season.2$KillPred, by = list(all.season.2$ID, all.season.2$Kill), FUN = function(x) sd(x)/sqrt(length(x)))[,3]
names(all.agg)<-c('ID', 'Kill','KillPrediction', 'SD', 'SE')
print(vplot<-ggplot2.violinplot(data = all.agg, xName = 'Kill', yName = 'KillPrediction',addDot = TRUE, dotSize = 1, dotPosition = "jitter"))
print(vplot<-vplot + theme(panel.grid.major = element_line(linetype = "blank"),
panel.grid.minor = element_line(linetype = "blank"),
panel.background = element_rect(fill = NA)) +labs(title = "Distribution of Kill Site Predictions"))
##### It's clear that winter and summer data can't be differentiated, so we can combine ######
######################## Run a Model with just geomorphic variables ##############################
geo.pred.names<-(names(pr[,c(9,15,19,21:23)]))
geo.model<-TediousEcology::boot.fun(data = newdata, prop = 0.8, n.boot = 500, mtry = 4, samplesize = c(50,10), cutoff = 0.75, pred.names = geo.pred.names, cat.column = "Kill")
geo.model<-geo.model[[1]]
geo.agg<-aggregate(geo.model$KillPred, by = list(geo.model$ID, geo.model$Kill), FUN = mean, na.rm =TRUE)
geo.agg$SD<-aggregate(geo.model$KillPred, by = list(geo.model$ID, geo.model$Kill), FUN = sd, na.rm = TRUE)[,3]
geo.agg$SE<-aggregate(geo.model$KillPred, by = list(geo.model$ID, geo.model$Kill), FUN = function(x) sd(x)/sqrt(length(x)))[,3]
names(geo.agg)<-c('ID', 'Kill','KillPrediction', 'SD', 'SE')
print(vplot<-ggplot2.violinplot(data = geo.agg, xName = 'Kill', yName = 'KillPrediction',addDot = TRUE, dotSize = 1, dotPosition = "jitter"))
print(vplot<-vplot + theme(panel.grid.major = element_line(linetype = "blank"),
panel.grid.minor = element_line(linetype = "blank"),
panel.background = element_rect(fill = NA)) +labs(title = "Distribution of Kill Site Predictions"))
##### Well, this doesn't make accuracy any better, seems as though removing outliers, and keep all variables is the best way forward ####
predrisk<-randomForest(newdata[, pred.names], as.factor(newdata[,'Kill']), sampsize = c(317,56), mtry = 4, cutoff = c(0.75, 1-0.75))
files<-unzip(raspath, files = NULL)
files<-grep(".img$", files, value = TRUE)
r<-list()
for(i in 1:length(files)){
tempras<-raster(files[i])
r[[i]]<-assign(names(tempras), tempras)
stack<-lapply(r, stack)
rasstack<-stack(stack)
}
predmap<-raster::predict(rasstack, predrisk, progress= "text", type ="prob", index = 2)
writeRaster(predmap, 'C:/Users/khuggler/Box Sync/DEER/GradStudentWork/Huggler/Chapter1/Analyses/Data/PredationRiskLayer/PredRiskSummer.img', format = "HFA",
overwrite = TRUE)
varImpPlot(predrisk)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.