#'@title Single GDM Cross-Validation Test, Internal Function
#'
#'@description Undertake a cross-validation assessment of a GDM, using a single
#'training and testing dataset.
#'
#' @usage gdm.single.crossvalidation(spTable_train, spTable_test, geo=FALSE,
#' splines=NULL, knots=NULL)
#'
#'@param spTable_train (dataframe) A dataframe holding the GDM input table for
#'model fitting.
#'
#'@param spTable_test (dataframe) A dataframe holding the GDM input table for
#'model testing, having identical column names to 'spTable_train' but using
#'different site-pairs.
#'
#'@param geo (boolean) Geographic distance to be used in model fitting
#'(default = FALSE).
#'
#'@param splines (vector) An optional vector of the number of I-spline basis
#'functions to be used for each predictor in fitting the model.
#'
#'@param knots (vector) An optional vector of knots in units of the predictor
#'variables to be used in the fitting process.
#'
#'@return List, providing cross-validation statistics. These are metrics that
#'describe how well the model fit using the sitepair training table predicts
#'the dissimilarities in the sitepair testing table. Metrics provided include:
#' 'Deviance.Explained' (the deviance explained for the training data);
#' 'Test.Deviance.Explained' (the deviance explained for the test data);
#' 'Mean.Error';
#' 'Mean.Absolute.Error';
#' 'Root.Mean.Squre.Error';
#' 'Obs.Pred.Correlation' (Pearson's correlation coefficient between observed and predicted values);
#' 'Equalised.RMSE' (the average root mean square error across bands of observed dissimilarities (0.05 dissimialrity units));
#' 'Error.by.Observed.Value' (the average root mean square error and number of observations within bands of observed dissimilarities (0.05 dissimialrity units)).
#'
#' @keywords gdm internal
#'
#'@importFrom stats complete.cases
#'@importFrom stats cor
#'@importFrom stats na.omit
#'@importFrom stats predict
#'@importFrom stats sd
#'
gdm.single.crossvalidation=function(spTable_train,
spTable_test,
geo=FALSE,
splines=NULL,
knots=NULL)
{
# omit rows with NAs, just in case
GDM_Table_Training <- spTable_train[complete.cases(spTable_train),]
GDM_Table_Testing <- spTable_test[complete.cases(spTable_test),]
# suppress the warnings GDM often throws up
oldw <- getOption("warn")
options(warn = -1)
# Fit the gdm on the training set of sites
train.mod <- gdm(GDM_Table_Training,
geo=geo,
splines=splines,
knots=knots)
# now predict the dissimilarity for the test sites (pairs)
pred.test <- predict(train.mod, GDM_Table_Testing)
# and fit a gdm to the test data to get the null deviance
test.mod <- gdm(GDM_Table_Testing[,c(1:6)],
geo=TRUE)
# reset the warnings
options(warn = oldw)
if(!is.null(test.mod)){
# Calculate deviance explained for the test data
test.data.gdm.deviance <- calculate.gdm.deviance(predDiss=pred.test,
obsDiss=GDM_Table_Testing$distance)
test.D2 <- ((test.mod$nulldeviance - test.data.gdm.deviance) / test.mod$nulldeviance)*100
# if the deviance explained is negative, set it to zero
if(test.D2 < 0){
test.D2 <- 0
}
}else{
test.D2 <- NA
}
# calculate an array of test statistics using observed & predicted dissimilarities in the
# testing data
# Error
error = GDM_Table_Testing[,1] - pred.test
# Mean Error
ME = mean(error)
# Mean Absolute Error
MAE = mean(abs(error))
# Root Mean Square Error
RMSE = sqrt(mean(error^2))
# correlation, obs vs exp
ObsPred.R <- cor(pred.test , GDM_Table_Testing[,1] , method = "pearson")
# something new breaking error up over the range of observed values
obs.breaks<-seq(from=0, to=1, by=0.05)
obs.value.err <- data.frame("obs.dissim"=seq(from=0.025, to=0.975, by=0.05),
"obs.count"=rep(0, times=20),
"pred.RMSE"=rep(0, times=20))
obs.err<-cbind(GDM_Table_Testing[,1], error)
for(i.cat in 1:nrow(obs.value.err))
{
# find the observed values within the specified range of dissimilarities
if(i.cat == 1){
cat.obs.err<-obs.err[(obs.err[,1]>=obs.breaks[i.cat] & obs.err[,1]<=obs.breaks[(i.cat+1)]),2]
}else{
cat.obs.err<-obs.err[(obs.err[,1]>obs.breaks[i.cat] & obs.err[,1]<=obs.breaks[(i.cat+1)]),2]
}
# check if there are any observations in this category
if(length(cat.obs.err)>0) {
obs.value.err[i.cat,2]<-length(cat.obs.err)
obs.value.err[i.cat,3]<-sqrt(mean(cat.obs.err^2))
}else{
obs.value.err[i.cat,3]<-NA
}
} #end for i.cat
# summarise the prediction error across dissimilarity bands
equ.RMSE<-mean(obs.value.err[,3], na.rm=TRUE)
# write the outputs of the function
list(Deviance.Explained = train.mod$explained,
Test.Deviance.Explained = test.D2,
Mean.Error = ME,
Mean.Absolute.Error = MAE,
Root.Mean.Squre.Error = RMSE,
Obs.Pred.Correlation = ObsPred.R,
Equalised.RMSE = equ.RMSE,
Error.by.Observed.Value = obs.value.err)
} # end gdm.single.crossvalidation function
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.