#' @title Wrapper for RF Runs
#' @description
#'
#' Convenience wrapper for batch RF runs
#'
#' @param df Input dataframe
#' @param y Name of response variable
#' @param incl Variables to include
#' @param prov Cleland province to include
#'
#' @details
#'
#' If `prov` is not NULL, then `Cleland_province` must be present as a column
#' in the input dataframe.
#'
#' @examples RandomForestWRapper(df = mydf)
#' @export
RandomForestWrapper <- function(
df, y = df[, 1], incl = colnames(df), prov = NULL,
ntree = 500, mtry = NULL, ret = 'rf', plot = F
) {
# Prepare input df:
in_df <- RSFIA::PrepDataForModels(df, cc = T, char_to_fac = T,
max_lvl = 53, incl = c(incl, y))
if (length(prov) > 0) {
in_df <- in_df[which(in_df$Cleland_province %in% prov), ]
}
y_in <- in_df[[y]]
in_df <- in_df[, -which(colnames(in_df) == y)]
# Set RF parameters:
if (length(mtry) < 1) {
if (!is.null(y_in) && !is.factor(y_in)) {
mtry <- max(floor(ncol(in_df) / 3), 1)
} else {
mtry <- floor(sqrt(ncol(in_df)))
}
} else {
if (mtry > ncol(in_df)) {
mtry <- ncol(in_df)
warning('mtry input too large, set to bagging')
}
}
# Run RF:
out_rf <- randomForest::randomForest(
x = in_df, y = y_in, do.trace = T, mtry = mtry, ntree = ntree
)
# Return plots and rf:
out_pred <- predict(out_rf, se.fit = T)
if (plot == T) {
randomForest::varImpPlot(out_rf)
plot(out_rf$y, out_pred, xlab = 'Observed Y', ylab = 'Predicted Y')
}
r2 <- round(summary(lm(out_pred ~ y_in))$r.squared, 2)
if (ret == 'rf') {
message('Predicted vs observed r-squared:')
print(r2)
return(invisible(out_rf))
} else if (ret == 'r2') {
return(r2)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.