tests/testthat/helper.R

# Create 6x6 point grid with 1m distance between points
test_make_sp = function() {
  coordinates = expand.grid(315172:315177, 5690670:5690675)
  names(coordinates) = c("x", "y")
  coordinates
}

# regr tasks -------------------------------------------------------------------

# Create regression task
test_make_regr_task = function(coords_as_features = FALSE) {
  data = test_make_sp()
  data$p_1 = c(rep("A", 18), rep("B", 18))
  data$response = rnorm(36)

  TaskRegrST$new(
    id = "sp_regression",
    backend = data,
    target = "response",
    coordinate_names = c("x", "y"),
    crs = "+proj=utm +zone=33 +datum=WGS84 +units=m +no_defs",
    coords_as_features = coords_as_features
  )
}

# Create regression task
# similar to test_make_regr_task(), just to check if 'sf' objects work
# as intended
test_make_sf_regr_task = function(coords_as_features = FALSE) {
  data = test_make_sp()
  data$p_1 = c(rep("A", 18), rep("B", 18))
  data$response = rnorm(36)

  data_sf = sf::st_as_sf(data, coords = c("x", "y"), crs = "epsg:4326")

  as_task_regr_st(
    data_sf,
    id = "sf_regr",
    target = "response",
    coordinate_names = c("x", "y"),
    crs = "+proj=utm +zone=33 +datum=WGS84 +units=m +no_defs",
    coords_as_features = FALSE
  )
}

# classif tasks ----------------------------------------------------------------

# Create twoclass task
test_make_twoclass_task = function(group = FALSE, coords_as_features = FALSE,
  features = "numeric") {
  data = test_make_sp()
  if ("numeric" %in% features) {
    data$p_1 = c(rnorm(18, 0), rnorm(18, 10))
  }
  if ("factor" %in% features) {
    data$p_2 = as.factor(c(rep("lvl_1", 18), rep("lvl_2", 18)))
  }
  data$response = as.factor(c(rep("A", 18), rep("B", 18)))

  if (group) {
    data$group = rep_len(letters[1:10], 36)
  }

  task = TaskClassifST$new(
    id = "sp_twoclass",
    backend = data,
    target = "response",
    coordinate_names = c("x", "y"),
    crs = "+proj=utm +zone=33 +datum=WGS84 +units=m +no_defs",
    positive = "A",
    coords_as_features = coords_as_features
  )

  if (group) {
    task$col_roles$group = "group"
  }
  task
}

# Create twoclass sf task
# similar to test_make_twoclass_task(), just to check if 'sf' objects work
# as intended
test_make_sf_twoclass_task = function(group = FALSE, coords_as_features = FALSE, features = "numeric") {
  data = test_make_sp()
  if ("numeric" %in% features) {
    data$p_1 = c(rnorm(18, 0), rnorm(18, 10))
  }
  if ("factor" %in% features) {
    data$p_2 = as.factor(c(rep("lvl_1", 18), rep("lvl_2", 18)))
  }
  data$response = as.factor(c(rep("A", 18), rep("B", 18)))

  if (group) {
    data$group = rep_len(letters[1:10], 36)
  }

  data_sf = sf::st_as_sf(data, coords = c("x", "y"), crs = "epsg:4326")

  task = as_task_classif_st(
    data_sf,
    id = "sf_twoclass",
    target = "response",
    positive = "A",
    coords_as_features = coords_as_features
  )

  if (group) {
    task$col_roles$group = "group"
  }
  task
}

# Create twoclass sf task
test_make_sf_twoclass_df = function(group = FALSE,
  coords_as_features = FALSE, features = "numeric") {
  data = test_make_sp()
  if ("numeric" %in% features) {
    data$p_1 = c(rnorm(18, 0), rnorm(18, 10))
  }
  if ("factor" %in% features) {
    data$p_2 = as.factor(c(rep("lvl_1", 18), rep("lvl_2", 18)))
  }
  data$response = as.factor(c(rep("A", 18), rep("B", 18)))

  if (group) {
    data$group = rep_len(letters[1:10], 36)
  }

  data_sf = sf::st_as_sf(data, coords = c("x", "y"), crs = "epsg:4326")

  return(data_sf)
}

# sf DF to use directly with {blockCV} functions
test_make_blockCV_test_df = function() {
  set.seed(123)
  x = runif(1000, -80.4, -74)
  y = runif(1000, 39.6, 41)

  data = data.frame(
    spp = "test",
    label = factor(round(runif(length(x), 0, 1))),
    x = x,
    y = y
  )

  data_sf = sf::st_as_sf(data,
    coords = c("x", "y"),
    crs = "EPSG:4326"
  )

  return(data_sf)
}

# mlr3 task to compare mlr3spatiotempcv results with {blockCV} results
test_make_blockCV_test_task = function() {
  data = test_make_blockCV_test_df()

  task = as_task_classif_st(
    data,
    id = "test",
    target = "label",
    positive = "1"
  )
  return(task)
}

# mlr3 task to compare mlr3spatiotempcv results with {blockCV} results
test_make_knndm_test_task = function() {
  data = test_make_blockCV_test_df()

  task = as_task_classif_st(
    data,
    id = "test",
    target = "label",
    positive = "1"
  )
  return(task)
}

# multiclass tasks -------------------------------------------------------------
#
# Create multiclass task
test_make_multiclass = function() {
  data = test_make_sp()
  data$p_1 = rnorm(36)
  data$response = as.factor(c(rep("A", 9), rep("B", 9), rep("C", 9), rep("D", 9)))

  TaskClassifST$new(
    id = "sp_multiclass",
    backend = data,
    target = "response",
    coordinate_names = c("x", "y"),
    crs = "+proj=utm +zone=33 +datum=WGS84 +units=m +no_defs",
    coords_as_features = FALSE
  )
}

# mlr3pipelines Graph learner --------------------------------------------------

test_graph_learner = function(task, resampling,
  time_var = NULL,
  space_var = NULL,
  learner = "classif.featureless",
  measure = "classif.ce") {
  loadNamespace("mlr3pipelines")

  if (!is.null(time_var)) {
    task$set_col_roles(time_var, "time")
    task$set_col_roles(space_var, "space")
  }

  lgr::get_logger("mlr3")$set_threshold("warn")
  lgr::get_logger("bbotk")$set_threshold("warn")

  lrn = lrn(
    learner
  )
  po_lrn = mlr3pipelines::po("learner", lrn)

  # Create feature filter based on variable importance
  po_filter = mlr3pipelines::po("filter",
    filter = mlr3filters::flt("importance", learner = lrn)
  )

  # Create process (new learner) for filtering the task
  grph = mlr3pipelines::Graph$new()$add_pipeop(po_filter)$add_pipeop(po_lrn)$add_edge("importance", learner) # nolint
  glrn = mlr3pipelines::GraphLearner$new(grph)

  # Create filter parameters
  param_set = paradox::ps(importance.filter.frac = paradox::p_dbl(lower = 0.1, upper = 1))

  # Create filtering instance
  instance = mlr3tuning::TuningInstanceSingleCrit$new(
    task = task,
    learner = glrn,
    resampling = resampling,
    measure = msr(measure),
    search_space = param_set,
    terminator = mlr3tuning::trm("none")
  )

  # Create tuner
  tuner = mlr3tuning::tnr("grid_search", resolution = 2)
  tuner$optimize(instance)

  return(TRUE)
}

# vdiffr::expect_doppelganger = function(title, fig, path = NULL, ...) {
#   testthat::skip_if_not_installed("vdiffr")
#   vdiffr::expect_doppelganger(title = title, fig = fig, path = path, ...)
# }

Try the mlr3spatiotempcv package in your browser

Any scripts or data that you put into this service are public.

mlr3spatiotempcv documentation built on May 29, 2024, 11:30 a.m.