#' gbm.utils: roc, calibration & gbm.predict.grids functions bundle
#'
#' Note: Man page named after 1st function in script i.e. roc but script
#' contains 3. Should be no need for users to interact with these directly
#'
#' roc: adapted from Ferrier, Pearce and Watson's code, by J.Elith, see: Hanley,
#' J.A. & McNeil, B.J. (1982) The meaning and use of the area under a Receiver
#' Operating Characteristic (ROC) curve. Radiology, 143, 29-36. Also Pearce, J.
#' & Ferrier, S. (2000) Evaluating the predictive performance of habitat models
#' developed using logistic regression. Ecological Modelling, 133, 225-245. This
#' is the non-parametric calculation for area under the ROC curve, using the
#' fact that a MannWhitney U statistic is closely related to the area. In dismo,
#' this is used in the gbm routines, but not elsewhere (see evaluate).
#'
#' calibration: j elith/j leathwick 17th March 2005. Calculates calibration
#' statistics for either binomial or count data but the family argument must be
#' specified for the latter a conditional test for the latter will catch most
#' failures to specify the family
#'
#' gbm.predict.grids: J.Elith / J.Leathwick, March 07. To make predictions to
#' sites or grids. If to sites, the predictions are written to the R workspace.
#' If to grid, the grids are written to a nominated directory and optionally
#' also plotted in R. New data (new.dat) must be a data frame with column names
#' identical to names for all variables in the model used for prediction.
#' pred.vec is a vector of -9999's, the length of the scanned full grid (i.e.
#' without nodata values excluded).filepath must specify the whole path as a
#' character vector,but without the final file name - eg "c:/gbm/"
#'
#' @param obsdat Observed data.
#' @param preddat Predicted data.
#'
#' @return roc & calibration stats internally within gbm runs e.g. in gbm.auto.
#' @importFrom gbm predict.gbm
#' @importFrom grDevices topo.colors
#' @importFrom graphics image
#' @importFrom stats binomial glm pchisq poisson
#' @importFrom utils write.table
#' @author Simon Dedman, \email{simondedman@@gmail.com}
#' @author Jane Elith
#' @author John Leathwick
#' @examples None
roc <- function(obsdat, preddat) {
# code adapted from Ferrier, Pearce and Watson's code, by J.Elith
# see: Hanley, J.A. & McNeil, B.J. (1982) The meaning and use of the area
# under a Receiver Operating Characteristic (ROC) curve. Radiology, 143, 29-36
#
# Pearce, J. & Ferrier, S. (2000) Evaluating the predictive performance of habitat models developed using logistic regression.
# Ecological Modelling, 133, 225-245.
# this is the non-parametric calculation for area under the ROC curve, using the fact that a MannWhitney U statistic is closely related to
# the area. In dismo, this is used in the gbm routines, but not elsewhere (see evaluate).
if (length(obsdat) != length(preddat))
stop("obs and preds must be equal lengths")
n.x <- length(obsdat[obsdat == 0])
n.y <- length(obsdat[obsdat == 1])
xy <- c(preddat[obsdat == 0], preddat[obsdat == 1])
rnk <- rank(xy)
wilc <- ((n.x * n.y) + ((n.x * (n.x + 1))/2) - sum(rnk[1:n.x]))/(n.x * n.y)
return(round(wilc, 4))
}
#' gbm.utils: calibration
#'
#' calibration: j elith/j leathwick 17th March 2005. Calculates calibration
#' statistics for either binomial or count data but the family argument must be
#' specified for the latter a conditional test for the latter will catch most
#' failures to specify the family
#'
#' @param obs Observed data.
#' @param preds Predicted data.
#' @param family Statistical distribution family.
#'
#' @return roc & calibration stats internally within gbm runs e.g. in gbm.auto.
#' @author Simon Dedman, \email{simondedman@@gmail.com}
#' @author Jane Elith
#' @author John Leathwick
#' @examples None
calibration <- function(obs, preds, family = "binomial") {
# j elith/j leathwick 17th March 2005
# calculates calibration statistics for either binomial or count data but the family argument must be specified for the latter
# a conditional test for the latter will catch most failures to specify the family
if (family == "bernoulli") family <- "binomial"
pred.range <- max(preds) - min(preds)
if (pred.range > 1.2 & family == "binomial") {
print(paste0("range of response variable is ", round(pred.range, 2)), quote = F)
print("check family specification", quote = F)
return()
}
if (family == "binomial") {
pred <- preds + 1e-005
pred[pred >= 1] <- 0.99999
mod <- glm(obs ~ log((pred)/(1 - (pred))), family = binomial)
lp <- log((pred)/(1 - (pred)))
a0b1 <- glm(obs ~ offset(lp) - 1, family = binomial)
miller1 <- 1 - pchisq(a0b1$deviance - mod$deviance, 2)
ab1 <- glm(obs ~ offset(lp), family = binomial)
miller2 <- 1 - pchisq(a0b1$deviance - ab1$deviance, 1)
miller3 <- 1 - pchisq(ab1$deviance - mod$deviance, 1)
}
if (family == "poisson") {
mod <- glm(obs ~ log(preds), family = poisson)
lp <- log(preds)
a0b1 <- glm(obs ~ offset(lp) - 1, family = poisson)
miller1 <- 1 - pchisq(a0b1$deviance - mod$deviance, 2)
ab1 <- glm(obs ~ offset(lp), family = poisson)
miller2 <- 1 - pchisq(a0b1$deviance - ab1$deviance, 1)
miller3 <- 1 - pchisq(ab1$deviance - mod$deviance, 1)
}
calibration.result <- c(mod$coef, miller1, miller2, miller3)
names(calibration.result) <- c("intercept", "slope", "testa0b1", "testa0|b1", "testb1|a")
return(calibration.result)
}
#' gbm.utils: gbm.predict.grids
#'
#' gbm.predict.grids: J.Elith / J.Leathwick, March 07. To make predictions to
#' sites or grids. If to sites, the predictions are written to the R workspace.
#' If to grid, the grids are written to a nominated directory and optionally
#' also plotted in R. New data (new.dat) must be a data frame with column names
#' identical to names for all variables in the model used for prediction.
#' pred.vec is a vector of -9999's, the length of the scanned full grid (i.e.
#' without nodata values excluded).filepath must specify the whole path as a
#' character vector,but without the final file name - eg "c:/gbm/"
#' @param model Gbm model object.
#' @param new.dat New data to predict to. Must be a data frame with column names
#' identical to names for all variables in the model used for prediction.
#' @param want.grids If used, grids are written to a nominated directory and
#' optionally also plotted in R.
#' @param preds2R Write predictions to R directory.
#' @param sp.name Species prediction name
#' @param pred.vec A vector of -9999's, the length of the scanned full grid
#' (i.e. without nodata values excluded).
#' @param filepath Must specify the whole path as a character vector,but without
#' the final file name - eg "c:/gbm/".
#' @param num.col Number of columns for grids.
#' @param num.row Number of rows for grids.
#' @param xll X latitude longitude corner.
#' @param yll Y latitude longitude corner.
#' @param cell.size Cell size.
#' @param no.data No data value.
#' @param plot Plot output.
#' @param full.grid Predict to full grid else to part grid.
#' @param part.number Number of grid subset part.
#' @param part.row Number of rows of grid subset part.
#' @param header Want.grids header included.
#'
#' @return Powers the predictive mapping element of gbm.map.
#' @author Simon Dedman, \email{simondedman@@gmail.com}
#' @author Jane Elith
#' @author John Leathwick
#' @examples None
gbm.predict.grids <- function(model, new.dat, want.grids = F, preds2R = T, sp.name = "preds", pred.vec = NULL, filepath = NULL,
num.col = NULL, num.row = NULL, xll = NULL, yll = NULL, cell.size = NULL, no.data = NULL, plot = F,
full.grid = T, part.number = NULL, part.row = NULL, header = T)
{
# J.Elith / J.Leathwick, March 07
# to make predictions to sites or grids. If to sites, the predictions are written to the R workspace. If to grid,
# the grids are written to a nominated directory and optionally also plotted in R
#
# new data (new.dat) must be a data frame with column names identical to names for all variables in the model used for prediction.
# pred.vec is a vector of -9999's, the length of the scanned full grid (i.e. without nodata values excluded).
# filepath must specify the whole path as a character vector,but without the final file name - eg "c:/gbm/"
temp <- predict.gbm(model, new.dat, n.trees = model$gbm.call$best.trees, type = "response")
if (want.grids) {
newname <- paste0(filepath, sp.name,".asc")
full.pred <- pred.vec
full.pred[as.numeric(row.names(new.dat))] <- temp
if (header) {
write(paste0("ncols ",num.col),newname)
write(paste0("nrows ",num.row),newname,append = T)
write(paste0("xllcorner ",xll),newname,append = T)
write(paste0("yllcorner ",yll),newname,append = T)
write(paste0("cellsize ",cell.size),newname,append = T)
write(paste0("NODATA_value ",no.data),newname,append = T)
}
if (full.grid) {
full.pred.mat <- matrix(full.pred, nrow = num.row, ncol = num.col, byrow = T)
if (plot) {image(z = t(full.pred.mat)[, nrow(full.pred.mat):1], zlim = c(0,1), col = rev(topo.colors(12)))}
write.table(full.pred.mat, newname, sep = " ", append = T, row.names = F, col.names = F)
#also write to R directory, if required:
if (preds2R) {assign(sp.name, temp, pos = 1)}
} else {
full.pred.mat <- matrix(full.pred, nrow = part.row, ncol = num.col, byrow = T)
write.table(full.pred.mat, newname, sep = " ", append = T, row.names = F, col.names = F)
if (preds2R) {assign(paste0(sp.name, part.number), temp, pos = 1)}
}
} else {
assign(sp.name, temp, pos = 1)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.