Nothing
#' @title function for the best kappa parameter
#' @description
#' Computationally optimized function for determining the best kappa parameter for the optimal similarity
#' @references
#' Song, Y. (2022). Geographically Optimal Similarity. Mathematical Geosciences. doi: 10.1007/s11004-022-10036-8.
#'
#' @param formula A formula of GOS model.
#' @param data A `data.frame` or `tibble` of observation data.
#' @param kappa (optional) A numeric value of the percentage of observation locations
#' with high similarity to a prediction location. \eqn{kappa = 1 - tau}, where `tau` is
#' the probability parameter in quantile operator. kappa is 0.25 means that 25% of
#' observations with high similarity to a prediction location are used for modelling.
#' @param nrepeat (optional) A numeric value of the number of cross-validation training times.
#' The default value is `10`.
#' @param nsplit (optional) The sample training set segmentation ratio,which in `(0,1)`.
#' Default is `0.5`.
#' @param cores (optional) Positive integer. If cores > 1, a `parallel` package
#' cluster with that many cores is created and used. You can also supply a cluster
#' object. Default is `1`.
#'
#' @return A list.
#' \describe{
#' \item{\code{bestkappa}}{the result of best kappa}
#' \item{\code{cvrmse}}{all RMSE calculations during cross-validation}
#' \item{\code{cvmean}}{the average RMSE corresponding to different kappa in the cross-validation process}
#' \item{\code{plot}}{the plot of rmse changes corresponding to different kappa}
#' }
#' @export
#'
#' @examples
#' data("zn")
#' # log-transformation
#' hist(zn$Zn)
#' zn$Zn <- log(zn$Zn)
#' hist(zn$Zn)
#' # remove outliers
#' k <- removeoutlier(zn$Zn, coef = 2.5)
#' dt <- zn[-k,]
#' # determine the best kappa
#' system.time({
#' b1 <- gos_bestkappa(Zn ~ Slope + Water + NDVI + SOC + pH + Road + Mine,
#' data = dt,
#' kappa = c(0.01, 0.1, 1),
#' nrepeat = 1,
#' cores = 1)
#' })
#' b1$bestkappa
#' b1$plot
#'
gos_bestkappa = \(formula, data = NULL, kappa = seq(0.05,1,0.05),
nrepeat = 10, nsplit = 0.5, cores = 1){
doclust = FALSE
if (inherits(cores, "cluster")) {
doclust = TRUE
} else if (cores > 1) {
doclust = TRUE
cores = parallel::makeCluster(cores)
on.exit(parallel::stopCluster(cores), add=TRUE)
}
no = nrow(data)
formula = stats::as.formula(formula)
namey = all.vars(formula)[1]
CalRMSE = \(yobse,ypred){
return(sqrt(mean((yobse-ypred)^2)))
}
calcvrmse = \(paramp){# The function is wrapped this way to use `parallel::parLapply`.
i = paramp[[1]]
seed = paramp[[2]]
kappa = paramp[[3]]
set.seed(seed)
trainindex = sample.int(n = no,
size = floor(nsplit * no),
replace = F)
cvtrain = data[trainindex, ]
cvtest = data[-trainindex, ]
g = gos(formula, data = cvtrain, newdata = cvtest,
kappa = kappa, cores = 1)
pred = g$pred
cvrmse = c(kappa,CalRMSE(cvtest[[namey]], pred))
names(cvrmse) = c('kappa','rmse')
return(cvrmse)
}
paradf = data.frame("i" = seq_along(rep(kappa, times = nrepeat)),
"seed" = rep(c(1:nrepeat), each = length(kappa)),
"kappa" = rep(kappa, times = nrepeat))
parak = split(paradf, seq_len(nrow(paradf)))
if (doclust) {
parallel::clusterExport(cores,c('gos'))
out_rmse = parallel::parLapply(cores,parak,calcvrmse)
out_rmse = tibble::as_tibble(do.call(rbind, out_rmse))
} else {
out_rmse = purrr::map_dfr(parak,calcvrmse)
}
cv.out = out_rmse %>%
dplyr::summarise(rmse = mean(rmse,na.rm = T),
.by = kappa)
k = which(cv.out$rmse == min(cv.out$rmse))[1]
best_kappa = cv.out$kappa[k]
l1 = (max(cv.out$rmse)-min(cv.out$rmse))*0.1
best_x = cv.out$kappa[k]
best_y = cv.out$rmse[k]
p1 = ggplot2::ggplot(cv.out,
ggplot2::aes(x = kappa, y = rmse))+
ggplot2::geom_point()+
ggplot2::geom_line() +
ggrepel::geom_label_repel(data = data.frame(kappa = best_x,
rmse = best_y),
label=as.character(best_kappa)) +
ggplot2::scale_x_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
ggplot2::scale_y_continuous(limits = c(min(cv.out$rmse) - l1,
max(cv.out$rmse))) +
ggplot2::theme_bw()
out = list("bestkappa" = best_kappa,
"cvrmse" = out_rmse,
"cvmean" = cv.out,
"plot" = p1)
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.