inst/doc/spatialsample.R

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 8,
  fig.height = 5.75,
  eval = requireNamespace("gifski", quietly = TRUE) && requireNamespace("modeldata", quietly = TRUE) && sf::sf_use_s2()
)

## ----include = FALSE----------------------------------------------------------
library(ggplot2)
theme_set(theme_minimal())

## -----------------------------------------------------------------------------
data("ames", package = "modeldata")

## -----------------------------------------------------------------------------
ames_sf <- sf::st_as_sf(
  ames,
  # "coords" is in x/y order -- so longitude goes first!
  coords = c("Longitude", "Latitude"),
  # Set our coordinate reference system to EPSG:4326,
  # the standard WGS84 geodetic coordinate reference system
  crs = 4326
)

## ----eval = FALSE-------------------------------------------------------------
#  log10(Sale_Price) ~ Year_Built + Gr_Liv_Area + Bldg_Type

## -----------------------------------------------------------------------------
library(spatialsample)

set.seed(123)
cluster_folds <- spatial_clustering_cv(ames_sf, v = 15)

autoplot(cluster_folds)

## -----------------------------------------------------------------------------
cluster_folds

## -----------------------------------------------------------------------------
set.seed(123)
block_folds <- spatial_block_cv(ames_sf, v = 15)

autoplot(block_folds)

## -----------------------------------------------------------------------------
set.seed(123)
location_folds <-
  spatial_leave_location_out_cv(
    ames_sf,
    group = Neighborhood,
    v = 15
  )

autoplot(location_folds)

## -----------------------------------------------------------------------------
cluster_folds$type <- "cluster"
block_folds$type <- "block"
location_folds$type <- "location"

resamples <-
  dplyr::bind_rows(
    cluster_folds,
    block_folds,
    location_folds
  )

## -----------------------------------------------------------------------------
# `splits` will be the `rsplit` object
compute_preds <- function(splits) {
  # fit the model to the analysis set
  mod <- lm(log10(Sale_Price) ~ Year_Built + Bldg_Type * log10(Gr_Liv_Area),
    data = analysis(splits)
  )
  # identify the assessment set
  holdout <- assessment(splits)
  # return the assessment set, with true and predicted price
  tibble::tibble(
    geometry = holdout$geometry,
    Sale_Price = log10(holdout$Sale_Price),
    .pred = predict(mod, holdout)
  )
}

## -----------------------------------------------------------------------------
compute_preds(cluster_folds$splits[[7]])

## -----------------------------------------------------------------------------
library(purrr)
library(dplyr)

cv_res <- resamples %>%
  mutate(.preds = map(splits, compute_preds))

## -----------------------------------------------------------------------------
library(tidyr)
library(yardstick)

cv_rmse <- cv_res %>%
  unnest(.preds) %>%
  group_by(id, type) %>%
  rmse(Sale_Price, .pred)

cv_rmse

## ----fig.height=12------------------------------------------------------------
library(ggplot2)

cv_res %>%
  unnest(.preds) %>%
  left_join(cv_rmse, by = c("id", "type")) %>%
  ggplot(aes(color = .estimate)) +
  geom_sf(aes(geometry = geometry), alpha = 0.5) +
  labs(color = "RMSE") +
  scale_color_viridis_c() +
  facet_wrap(vars(type), ncol = 1)

Try the spatialsample package in your browser

Any scripts or data that you put into this service are public.

spatialsample documentation built on Nov. 8, 2023, 1:08 a.m.