Nothing
###############################################################################
# Function: grts_stratum (not exported)
# Programmers: Tony Olsen
# Date: January 22, 2021
#'
#' For a single stratum, select a spatially balanced sample using generalized random
#' tessellation stratified algorithm. For a point sample frame, the selection is a one-step
#' process. For linear and area sample frames, a two-step process is used. First a systematic
#' point sample is selected from either linear or area sample frame to create a dense point
#' sample. Then the grts algorithm is applied to the systematic point sample. The selection
#' in all cases is based on the survey design specification for the stratum.
#'
#' @param stratum Character value for the stratum name.
#'
#' @param dsgn List of components that specify the survey design. Includes all strata.
#' See grts for contents of dsgn.
#'
#' @param sframe Sample frame as an sf object. If the design is stratified,
#' unequal probability, proportional probability or has legacy sites, then sample frame
#' must include variables that identify the stratum; category, auxiliary and legacy variables
#' for unequal selection; or that identify elements that are legacy sites.
#' The coordinate system for sframe must be one where distance for coordinates is meaningful.
#'
#' @param sf_type The sample frame geometry type: point, linear or area
#'
#' @param wgt_units The units for weights if different from units in the \code{sf} object. Default
#' is \code{NULL}. Unit conversion uses \code{set_unit}s in \code{sf} package.
#'
#' @param pt_density For linear and area sample frame, the point density for the systematic
#' sample. Must be in units of the sframe sf.object. Default is NULL.
#'
#' @param legacy_option Logical variable that when TRUE legacy sites are to be included
#' in the survey design. Default is FALSE
#'
#' @param legacy_sites An sf object of legacy sites to be included in the survey design.
#'
#' @param maxtry Number of maximum attempts to ensure minimum distance between sites.
#' Default is 10.
#'
#' @param warn_ind A logical value where TRUE indicates a warning message.
#' Used for internal collection of messages only.
#'
#' @param warn_df A data frame containing messages warning of potential issues.
#' Used for internal collection of messages only.
#'
#' @return rslts A list consisting of an \code{sf} object for base sites, an \code{sf} object of
#' \code{n_over} sites (\code{NULL} if none) an \code{sf} object of \code{n_near} sites (\code{NULL} if none) where
#' the \code{sf} objects containing the sites selected that meet the survey design requirements
#' and \code{warn_ind} - logical value for warning indicator and \code{warn_df} - a data frame
#' for warning messages.
#'
#' @author Tony Olsen \email{olsen.tony@@epa.gov}
#'
#' @keywords survey
#'
#' @examples
#' \dontrun{
#' test_sample <- grts(stratum, dsgn = test_design, sframe = "test_sf", sf_type = "point")
#' }
#'
#' @noRd
###############################################################################
grts_stratum <- function(stratum, dsgn, sframe, sf_type, wgt_units = NULL, pt_density = NULL,
legacy_option = FALSE, legacy_sites = NULL, maxtry = 10,
warn_ind = FALSE, warn_df = NULL) {
# Sample sizes required
n_base <- dsgn[["n_base"]][[stratum]]
n_over <- sum(dsgn[["n_over"]][[stratum]], na.rm = TRUE)
if (is.null(n_over)) {
n_over <- 0
}
n_near <- dsgn[["n_near"]][[stratum]]
if (is.null(n_near)) {
n_near <- 0
}
n_total <- n_base + n_over
# set number of legacy sites to 0
n_legacy <- 0
# subset sframe to stratum
sftmp <- sframe[sframe$stratum == stratum, , drop = FALSE]
# find legacy site number for points if legacy_var provided
if (legacy_option == TRUE & is.null(legacy_sites)) {
n_legacy <- sum(!is.na(sftmp$legacy))
}
# subset legacy_sites to stratum if present for linear and area option
if (legacy_option == TRUE & (sf_type != "sf_point" | ((sf_type == "sf_point") & !is.null(legacy_sites)))) {
legtmp <- legacy_sites[legacy_sites$stratum == stratum, , drop = FALSE]
n_legacy <- nrow(legtmp)
}
# sf_type equals point
if (sf_type == "sf_point") {
ip_step1 <- 1
sftmp$xcoord <- st_coordinates(sftmp)[, "X"]
sftmp$ycoord <- st_coordinates(sftmp)[, "Y"]
sftmp$idpts <- 10000 + 1:nrow(sftmp)
}
# sf_type equals linear
if (sf_type == "sf_linear") {
# determine sample size from pt_density and total length of sample frame in stratum
stratum_len <- sum(st_length(sftmp))
if (!is.null(wgt_units)) {
stratum_len <- set_units(stratum_len, wgt_units, mode = "standard")
}
# set default equal to 10 population sites per requested sample site
if (is.null(pt_density)) {
pt_density <- 10
}
n_size <- as.integer(ceiling(pmin(1e9, pt_density * (n_base + n_over))))
sfpts <- st_sample(sftmp, size = n_size, type = "regular", exact = TRUE)
sfpts <- st_as_sf(as.data.frame(sfpts), crs = st_crs(sftmp))
sfpts <- st_cast(sfpts, to = "POINT")
# drop features with no points
sfpts <- sfpts[!st_is_empty(sfpts), ]
# join sites with linear features
sfpts <- st_cast(sfpts, to = "POINT")
sftmp <- st_join(sfpts, sftmp, join = st_nearest_feature)
names(sftmp)[names(sftmp) == "sfpts"] <- "geometry"
sftmp$xcoord <- st_coordinates(sftmp)[, "X"]
sftmp$ycoord <- st_coordinates(sftmp)[, "Y"]
sftmp$idpts <- 10000 + 1:nrow(sftmp)
# calculate step 1 inclusion probability based on realized sample size
ip_step1 <- nrow(sftmp) / stratum_len
}
# sf_type equals area
if (sf_type == "sf_area") {
# determine sample size from pt_density and total area of sample frame in stratum
stratum_area <- sum(st_area(sftmp))
if (!is.null(wgt_units)) {
stratum_area <- set_units(stratum_area, wgt_units, mode = "standard")
}
# set default equal to 10 population sites per requested sample site
if (is.null(pt_density)) {
pt_density <- 10
}
n_size <- as.integer(ceiling(pmin(1e9, pt_density * (n_base + n_over))))
sfpts <- st_sample(sftmp, size = n_size, type = "hexagonal", exact = TRUE)
sfpts <- st_as_sf(as.data.frame(sfpts), crs = st_crs(sftmp))
sfpts <- st_cast(sfpts, to = "POINT")
# drop features with no points
sfpts <- sfpts[!st_is_empty(sfpts), ]
sftmp <- st_join(sfpts, sftmp)
sftmp$xcoord <- st_coordinates(sftmp)[, "X"]
sftmp$ycoord <- st_coordinates(sftmp)[, "Y"]
sftmp$idpts <- 10000 + 1:nrow(sftmp)
# calculate step 1 inclusion probability based on realized sample size
ip_step1 <- nrow(sftmp) / stratum_area
}
# Determine number of elements in stratum
Nstratum <- nrow(sftmp)
# Determine if legacy sites are to be included for stratum design
if (legacy_option == TRUE & n_legacy > 0 & (sf_type != "sf_point" | ((sf_type == "sf_point") & !is.null(legacy_sites)))) {
tmp <- legtmp
addtmp <- setdiff(names(tmp), names(sftmp))
addleg <- setdiff(names(sftmp), names(tmp))
sftmp[, addtmp] <- NA
tmp[, addleg] <- NA
sftmp <- rbind(tmp, sftmp)
# Determine number of elements in stratum
Nstratum <- nrow(sftmp)
}
# set legacy that is NA to FALSE
if (legacy_option == TRUE & n_legacy > 0) {
sftmp$legacy <- ifelse(is.na(sftmp$legacy), FALSE, TRUE)
tmp <- sftmp[sftmp$legacy == TRUE, , drop = FALSE]
n_legacy <- nrow(tmp)
}
if (legacy_option == TRUE & n_legacy == 0) {
sftmp$legacy <- FALSE
}
# check that number of legacy sites is less than or equal number of base sites
# stop if not
if (n_legacy > n_base) {
message("Number of legacy sites is greater than number of base sites in at least one\n")
message("stratum. Please check that all strata have fewer legacy sites than base sites.\n")
opt <- options(show.error.messages = FALSE)
on.exit(options(opt))
stop()
}
# Step 2 site selection if linear or area; otherwise Step 1 for points.
# determine overall sample size required from dsgn for stratum
# account for n_over sample option if present
if (dsgn[["seltype"]][[stratum]] == "equal" | dsgn[["seltype"]][[stratum]] == "proportional") {
n_caty <- n_total
} else {
if (n_over == 0) {
n_caty <- dsgn[["caty_n"]][[stratum]]
} else {
base_prop <- dsgn[["caty_n"]][[stratum]] / sum(dsgn[["caty_n"]][[stratum]])
n_caty <- dsgn[["caty_n"]][[stratum]] + dsgn[["n_over"]][[stratum]] * base_prop
}
}
# If seltype is "equal" or "proportional", set caty to same as stratum
if (dsgn[["seltype"]][[stratum]] == "equal" | dsgn[["seltype"]][[stratum]] == "proportional") {
sftmp$caty <- "None"
}
# compute inclusion probabilities
ip <- grtspts_ip(
type = dsgn[["seltype"]][stratum], n_base = n_caty,
Nstratum = Nstratum, caty = sftmp$caty, aux = sftmp$aux,
warn_ind = warn_ind, warn_df = warn_df
)
# save initial inclusion probabilities
sftmp$ip_init <- ip$ip
sftmp$ip <- ip$ip
# accumulate warning messages if any
if (ip$warn_ind) {
warn_ind <- ip$warn_ind
warn_df <- ip$warn_df
warn_df$stratum <- ifelse(is.na(warn_df$stratum), stratum, warn_df$stratum)
}
# If legacy sites, adjust inclusion probabilities to use
# legacy inclusion probabilities
if (legacy_option == TRUE & n_legacy > 0) {
sftmp$ip <- grtspts_ipleg(sftmp$ip_init, sftmp$legacy == TRUE)
# accumulate warning messages if any
if (ip$warn_ind) {
warn_ind <- ip$warn_ind
warn_df <- ip$warn_df
warn_df$stratum <- ifelse(is.na(warn_df$stratum), stratum, warn_df$stratum)
}
}
# select sites if no minimum distance between sites
if (is.null(dsgn[["mindis"]][[stratum]])) {
sites <- sftmp[get_address(sftmp$xcoord, sftmp$ycoord, rand = TRUE), ]
s <- UPpivotal(sites$ip)
sites <- sites[round(s) == 1, ]
sites <- rho(sites)
sites$siteuse <- NA
sites$replsite <- NA
sites <- list(sites = sites, warn_ind = warn_ind, warn_df = warn_df)
}
# If minimum distance between sites, select sites
if (!is.null(dsgn[["mindis"]][[stratum]])) {
sites <- grtspts_mindis(dsgn[["mindis"]][[stratum]], sftmp,
samplesize = n_total,
stratum = stratum, maxtry = maxtry, legacy_option = legacy_option,
legacy_var = dsgn[["legacy_var"]],
warn_ind = warn_ind, warn_df = warn_df
)
}
# check for warning messages
warn_ind <- sites$warn_ind
warn_df <- sites$warn_df
if (warn_ind) {
warn_df$stratum <- ifelse(is.na(warn_df$stratum), stratum, warn_df$stratum)
}
# adjust inclusion probabilities when over sample sites present
sites[["sites"]]$ip_init <- sites[["sites"]]$ip_init * n_base / n_total
# Select replacement sites if n_near not NULL when do not have legacy sites
if (legacy_option == FALSE | n_legacy == 0) {
if (!is.null(dsgn[["n_near"]][[stratum]])) {
sites_near <- replace_near(dsgn[["n_near"]][[stratum]],
sites = sites[["sites"]],
sframe = sftmp
)
# Adjust inclusion probabilities for replacement sites if over sample sites present
if (n_over != 0) {
sites_near$ip_init <- sites_near$ip_init * n_base / n_total
}
}
}
# Select replacement sites if n_near not NULL when have legacy sites
if (legacy_option == TRUE & n_legacy > 0) {
if (!is.null(dsgn[["n_near"]][[stratum]])) {
keep <- sites[["sites"]][sites[["sites"]]$legacy %in% c(TRUE, FALSE), "idpts", drop = TRUE]
sites_near <- replace_near(dsgn[["n_near"]][[stratum]],
sites = sites[["sites"]][sites[["sites"]]$legacy %in% c(TRUE, FALSE), ],
sframe = subset(sftmp, !(sftmp$idpts %in% keep))
)
# Adjust inclusion probabilities for replacement sites if over sample sites present
if (n_over != 0) {
sites_near$ip_init <- sites_near$ip_init * n_base / n_total
}
}
}
# Assign original inclusion probabilities to sites, create weights and drop legacy ip variable
sites[["sites"]]$ip <- sites[["sites"]]$ip_init * ip_step1
sites[["sites"]]$wgt <- 1 / sites[["sites"]]$ip
tmp <- names(sites[["sites"]])
sites[["sites"]] <- subset(sites[["sites"]],
select = tmp[!(tmp %in% c("ip_init", "geometry"))]
)
# Do same for sites_near if any
if (is.null(dsgn[["n_near"]][[stratum]])) {
sites_near <- NULL
}
if (!is.null(dsgn[["n_near"]][[stratum]])) {
sites_near$ip <- sites_near$ip_init * ip_step1
sites_near$wgt <- 1 / sites_near$ip
tmp <- names(sites_near)
sites_near <- subset(sites_near, select = tmp[!(tmp %in% c("ip_init", "geometry"))])
sites_near[, setdiff(names(legacy_sites), names(sites_near))] <- NA
}
# Split sites to have separate sites_base, sites_legacy and sites_over
# save legacy sites if any and reduce sites_base to non legacy sites
sites_legacy <- NULL
if (legacy_option == TRUE & n_legacy > 0) {
sites_legacy <- sites[["sites"]][sites[["sites"]]$legacy == TRUE, ]
sites[["sites"]] <- sites[["sites"]][sites[["sites"]]$legacy == FALSE, ]
n_legacy <- nrow(sites_legacy)
}
# save base sites
sites_base <- NULL
if (n_base > n_legacy) {
sites_base <- sites[["sites"]][1:(n_base - n_legacy), ]
sites_base[, setdiff(names(legacy_sites), names(sites_base))] <- NA
}
# save n_over sample sites if any
sites_over <- NULL
if (n_over != 0) {
sites_over <- sites[["sites"]][(n_base - n_legacy + 1):(n_total - n_legacy), ]
sites_over$siteuse <- "Over"
sites_over[, setdiff(names(legacy_sites), names(sites_over))] <- NA
}
# if no legacy sites match in strata then put in appropriate column
if (legacy_option == TRUE & n_legacy == 0) {
sites_base$legacy <- FALSE
}
# create list for output and return result
rslts <- list(
sites_base = sites_base, sites_legacy = sites_legacy,
sites_over = sites_over, sites_near = sites_near,
warn_ind = warn_ind, warn_df = warn_df
)
invisible(rslts)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.