inst/doc/SubSample.R

## ----include = FALSE----------------------------------------------------------
library(ARUtools)
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
options(tibble.print_min = 4L, tibble.print_max = 4L)

## ----code_folding = 'hide'----------------------------------------------------
library(dplyr)
library(stringr)
library(lubridate)

simple_deploy <-
  tidyr::expand_grid(
    site_id = unique(example_sites$Sites),
    doy = seq(121, 191, by = 2),
    times = seq(-30, 120, by = 30)
  ) |>
  tidyr::separate(site_id, into = c("plot", "site"), sep = "_", remove = F) |>
  left_join(example_sites, join_by(site_id == Sites)) |>
  mutate(
    # aru_id = glue::glue("BARLT-000{as.numeric(as.factor(site_id))}"),
    date = ymd("2028-01-01") + doy,
    date_time = ymd_hm(glue::glue("{date} 06:00")) + minutes(times),
    date_time_chr = str_replace(as.character(date_time), "\\s", "T"),
    file_name = glue::glue("{plot}/{site_id}/{ARU}_{date_time_chr}.wav")
  )

simple_deploy

## -----------------------------------------------------------------------------
site_info <- simple_deploy |>
  slice_min(order_by = date_time, n = 1, by = site_id) |>
  dplyr::select(site_id, ARU, lon, lat, date_time)

## -----------------------------------------------------------------------------
sites <- clean_site_index(site_info,
  name_aru_id = "ARU",
  name_site_id = "site_id",
  name_date_time = c("date_time"),
  name_coords = c("lon", "lat")
)
metadata <- clean_metadata(project_files = simple_deploy$file_name) |>
  add_sites(sites) |>
  calc_sun() |>
  dplyr::mutate(doy = lubridate::yday(date))
dplyr::glimpse(metadata)

## ----fig.height=6, out.height=8, warning=FALSE--------------------------------
p <- sim_selection_weights(
  min_range = c(-70, 240),
  day_range = c(120, lubridate::yday(lubridate::ymd("2021-07-20"))),
  min_mean = 30, min_sd = 60,
  day_mean = lubridate::yday(lubridate::ymd("2021-06-10")),
  day_sd = 20, offset = 0,
  return_log = TRUE, selection_fun = "norm"
)

## -----------------------------------------------------------------------------
full_selection_probs <-
  metadata |>
  calc_selection_weights(
    col_site_id = site_id,
    col_min = t2sr,
    col_day = doy,
    params = p
  )

## ----echo=FALSE---------------------------------------------------------------
library(ggplot2)
ggplot(full_selection_probs, aes(doy, t2sr, colour = psel_normalized)) +
  geom_point() +
  scale_colour_viridis_c() +
  facet_wrap(~site_id)

## -----------------------------------------------------------------------------
sample_size <- count(full_selection_probs, site_id) |>
  transmute(site_id,
    n = floor(n * .02),
    n_os = ceiling(n * .3)
  )

## ----echo=F-------------------------------------------------------------------
p_obs <-
  tidyr::expand_grid(
    p_obs = seq(0.01, 0.99, by = 0.01),
    n_obs = 1:20
  ) |>
  mutate(p_total = 1 - exp(-p_obs * n_obs)) |>
  ggplot(aes(n_obs, p_obs, fill = p_total), colour = NA) +
  geom_raster() +
  scale_fill_viridis_c() +
  labs(
    x = "Number of minutes observed", y = "Probably of observing per minute",
    fill = "Total\nprobability\nof\ndetection"
  )
p_obs

## -----------------------------------------------------------------------------
grts_res <- sample_recordings(full_selection_probs,
  n = sample_size,
  col_site_id = site_id,
  seed = 2024,
  col_sel_weights = psel_normalized
)

dplyr::glimpse(grts_res$sites_base)

## -----------------------------------------------------------------------------
withr::with_seed(2024, {
  random_sample <-
    full_selection_probs |>
    dplyr::slice_sample(
      n = 4, by = site_id,
      weight_by = psel_normalized,
      replace = F
    )
})

## ----eval=F-------------------------------------------------------------------
#  withr::with_seed(2024, {
#    random_sample_stratified <-
#      full_selection_probs |>
#      left_join(sample_size, by = join_by(site_id)) |>
#      nest_by(site_id, n) |>
#      rowwise() |>
#      mutate(sample = list(dplyr::slice_sample(
#        .data = data,
#        n = .data$n,
#        weight_by = psel_normalized,
#        replace = F
#      ))) |>
#      dplyr::select(site_id, sample) |>
#      tidyr::unnest(sample)
#  })

## -----------------------------------------------------------------------------
oversample <- filter(
  full_selection_probs,
  !path %in% random_sample$path
) |>
  dplyr::slice_sample(
    n = 2, by = site_id,
    weight_by = psel_normalized,
    replace = F
  )

## ----echo=F-------------------------------------------------------------------
filter(full_selection_probs, path %in% grts_res$sites_base$path) |>
  ggplot(aes(doy, t2sr)) +
  geom_point() +
  xlim(range(full_selection_probs$doy)) +
  ylim(range(full_selection_probs$t2sr)) +
  labs(x = "Day of Year", y = "Time to sunrise", title = "GRTS selection")

## ----echo=F-------------------------------------------------------------------
random_sample |>
  mutate(z = 1) |>
  ggplot(aes(doy, t2sr)) +
  geom_point() +
  xlim(range(full_selection_probs$doy)) +
  ylim(range(full_selection_probs$t2sr)) +
  labs(x = "Day of Year", y = "Time to sunrise", title = "Random selection")

## -----------------------------------------------------------------------------
withr::with_seed(6546, {
  random_sample$length <- sample(
    x = c("5min", "3min", "1min"),
    size = nrow(random_sample), replace = T
  )
})

## ----echo=F-------------------------------------------------------------------
count(random_sample, length, site_id) |>
  tidyr::pivot_wider(
    names_from = site_id,
    values_from = n, values_fill = list(n = 0),
    values_fn = as.numeric
  )

## -----------------------------------------------------------------------------
withr::with_seed(569, {
  sample5min <- slice_sample(random_sample,
    n = 1, by = site_id, weight_by = psel_normalized
  )

  sample3min <- slice_sample(
    random_sample |>
      filter(!path %in% sample5min$path),
    n = 1, by = site_id, weight_by = psel_normalized
  )


  random_sample_with_lengths <- random_sample |>
    mutate(
      Length_group =
        case_when(
          path %in% sample5min$path ~ "5min",
          path %in% sample3min$path ~ "3min",
          TRUE ~ "1min"
        ),
      length_clip = as.numeric(str_extract(Length_group, "^\\d")) * 60
    )
})

## ----echo=F-------------------------------------------------------------------
count(random_sample_with_lengths, Length_group, site_id) |>
  tidyr::pivot_wider(
    names_from = site_id,
    values_from = n, values_fill = list(n = 0),
    values_fn = as.numeric
  )

## ----eval=F-------------------------------------------------------------------
#  random_sample_with_lengths$length_clip <-
#    purrr::map(
#      1:nrow(random_sample_with_lengths),
#      ~ get_wav_length(
#        path = random_sample_with_lengths$path[[.x]],
#        return_numeric = T
#      )
#    )

## -----------------------------------------------------------------------------
random_sample_with_lengths$length <- 5 * 60

## ----eval=T-------------------------------------------------------------------
random_sample_with_lengths <-
  random_sample_with_lengths |>
  rowwise() |>
  mutate(StartTime = case_when(
    Length_group == "5min" ~ 0,
    TRUE ~ runif(
      1, 0,
      pmax(
        0,
        length - length_clip
      )
    )
  )) |>
  ungroup()

## ----echo=F-------------------------------------------------------------------
ggplot(random_sample_with_lengths, aes(StartTime, fill = Length_group)) +
  geom_histogram(binwidth = 10)

## -----------------------------------------------------------------------------
final_selection <- random_sample_with_lengths |>
  add_wildtrax()

final_selection |>
  head() |>
  dplyr::select(path, wildtrax_file_name)

## ----eval=F-------------------------------------------------------------------
#  out_directory <- "/path/to/upload/directory/"
#  dir.create(out_directory, recursive = T)
#  ul_tab <- expand_grid(
#    period = c("Dawn"), # Add 'Dusk' if using more than one time period
#    length = unique(selected_recordings$Length_group)
#  )
#  purrr::map(glue::glue("{out_directory}/{ul_tab$period}/{ul_tab$length}"),
#    dir.create,
#    recursive = T
#  )

## ----eval=F-------------------------------------------------------------------
#  log_output <-
#    format_clip_wave(
#      segment_df = final_selection,
#      in_base_directory = "", out_base_directory = out_directory,
#      length_clip_col = "length_clip",
#      sub_dir_out_col = c("Time_period", "Length_group"),
#      filepath_in_col = "path",
#      out_filename_col = "wildtrax_file_name",
#      use_job = F, filewarn = F
#    )

Try the ARUtools package in your browser

Any scripts or data that you put into this service are public.

ARUtools documentation built on Oct. 9, 2024, 1:07 a.m.