###############################################################################
# Function: grts (exported)
# Programmers: Tony Olsen, Tom Kincaid
# Date: January 22, 2021
#' Select a generalized random tessellation stratified (GRTS) sample
#'
#' Select a spatially balanced sample from a point (finite), linear / linestring (infinite),
#' or areal / polygon (infinite) sampling frame using the Generalized Random Tessellation
#' Stratified (GRTS) algorithm. The GRTS algorithm accommodates unstratified and
#' stratified sampling designs and allows for equal inclusion probabilities, unequal
#' inclusion probabilities according to a categorical variable, and inclusion
#' probabilities proportional to a positive auxiliary variable. Several additional
#' sampling options are included, such as including legacy (historical) sites,
#' requiring a minimum distance between sites, and selecting replacement sites.
#' For technical details, see Stevens and Olsen (2004).
#'
#'
#' @param sframe A sampling frame as an \code{sf} object. The coordinate
#' system for \code{sframe} must projected (not geographic). If m or z values
#' are in \code{sframe}'s geometry, they are silently dropped (i.e., only x-coordinates
#' and y-coordinates are preserved).
#'
#' @param n_base The base sample size required. If the sampling design is unstratified,
#' this is a single numeric value. If the sampling design is stratified, this is a named
#' vector or list whose names represent each stratum and whose values represent each
#' stratum's sample size. These names must match the values of the stratification
#' variable represented by \code{stratum_var}. Legacy sites are considered part
#' of the base sample, so the value for \code{n_base} should be equal to the number
#' of legacy sites plus the number of desired non-legacy sites.
#'
#' @param stratum_var A character string containing the name of the column from
#' \code{sframe} that identifies stratum membership for each element in \code{sframe}.
#' If stratum equals \code{NULL}, the sampling design is unstratified and all elements in \code{sframe}
#' are eligible to be selected in the sample. The default is \code{NULL}.
#'
#' @param seltype A character string or vector indicating the inclusion probability type,
#' which must be one of following: \code{"equal"} for equal inclusion probabilities;
#' \code{"unequal"} for unequal inclusion probabilities according to a categorical
#' variable specified by \code{caty_var}; and \code{"proportional"} for inclusion
#' probabilities proportional to a positive auxiliary variable specified by
#' \code{aux_var}. If the sampling design is unstratified, \code{seltype} is a single
#' character vector. If the sampling design is stratified, \code{seltype} is a named vector
#' whose names represent each stratum and whose values represent each stratum's
#' inclusion probability type. \code{seltype}'s default value tries to match the
#' intended inclusion probability type: If \code{caty_var} and \code{aux_var} are
#' not specified, \code{seltype} is \code{"equal"}; if \code{caty_var} is specified,
#' \code{seltype} is \code{"unequal"}; and if \code{aux_var} is specified, \code{seltype}
#' is \code{"proportional"}.
#'
#' @param caty_var A character string containing the name of the column from
#' \code{sframe} that represents the unequal probability variable.
#'
#' @param caty_n A character vector indicating the expected sample size for each
#' level of \code{caty_var}, the unequal probability variable. If the sampling design
#' is unstratified, \code{caty_n} is a named vector whose names represent each
#' level of \code{caty_var} and whose values represent each level's expected
#' sample size. The sum of \code{caty_n} must equal \code{n_base}. If the sampling design
#' is stratified and the expected sample sizes are the same among strata, \code{caty_n} is
#' a named vector whose names represent represent each
#' level of \code{caty_var} and whose values represent each level's expected
#' sample size -- these expected sample sizes are applied to all strata. The sum of
#' \code{caty_n} must equal each stratum's value in \code{n_base}.
#' If the sampling design is stratified and the expected sample sizes differ among strata,
#' \code{caty_n} is a list where each element is named as a stratum in \code{n_base}.
#' Each stratum's list element is a named vector whose
#' names represent each level of \code{caty_var} and whose values represent each
#' level's expected sample size (within the stratum). The sum of the values in each stratum's
#' list element must equal that stratum's value in \code{n_base}.
#'
#' @param aux_var A character string containing the name of the column from
#' \code{sframe} that represents the proportional (to size) inclusion probability
#' variable (auxiliary variable). This auxiliary variable must be positive, and the resulting
#' inclusion probabilities are proportional to the values of the auxiliary variable.
#' Larger values of the auxiliary variable result in higher inclusion probabilities.
#'
#' @param legacy_sites An sf object with a \code{POINT} or \code{MULTIPOINT}
#' geometry representing the legacy sites. spsurvey assumes that
#' the legacy sites were selected from a previous sampling design that
#' incorporated randomness into site selection and that the legacy sites
#' are elements of the current sampling frame. If \code{sframe} has a
#' \code{POINT} or \code{MULTIPOINT} geometry, the observations in \code{legacy_sites}
#' should not also be in \code{sframe} (i.e., duplicates are not removed). Thus, \code{sframe}
#' and \code{legacy_sites} together compose the current sampling frame. If m or z values
#' are in \code{legacy_sites}' geometry, they are silently dropped (i.e., only x-coordinates
#' and y-coordinates are preserved).
#'
#' @param legacy_stratum_var A character string containing the name of the column from
#' \code{legacy_sites} that identifies stratum membership for each element of \code{legacy_sites}.
#' This argument is required when the sampling design is stratified and its levels
#' must be contained in the levels of the \code{stratum_var} variable. The default value of \code{legacy_stratum_var}
#' is \code{stratum_var}, so \code{legacy_stratum_var} need only be specified explicitly when
#' the name of the stratification variable in \code{legacy_sites} differs from \code{stratum_var}.
#'
#' @param legacy_caty_var A character string containing the name of the column from
#' \code{legacy_sites} that identifies the unequal probability variable for each element of \code{legacy_sites}.
#' This argument is required when the sampling design uses unequal selection probabilities and its categories
#' must be contained in the levels of the \code{caty_var} variable. The default value of \code{legacy_caty_var}
#' is \code{caty_var}, so \code{legacy_caty_var} need only be specified explicitly when
#' the name of the unequal probability variable in \code{legacy_sites} differs from \code{caty_var}.
#'
#' @param legacy_aux_var A character string containing the name of the column from
#' \code{legacy_sites} that identifies the proportional probability variable for each element of \code{legacy_sites}.
#' This argument is required when the sampling design uses proportional selection probabilities and the values of the
#' \code{legacy_aux_var} variable must be positive. The
#' default value of \code{legacy_aux_var} is \code{aux_var}, so \code{legacy_aux_var} need only be specified explicitly
#' when the name of the proportional probability variable in \code{legacy_sites} differs from \code{aux_var}.
#'
#' @param legacy_var This argument can be used instead of \code{legacy_sites}
#' when \code{sframe} is a \code{POINT} or \code{MULTIPOINT}
#' geometry (i.e. a finite sampling frame),
#' When \code{legacy_var} is used, it is a character string containing the name of the column
#' from \code{sframe} that represents whether each site is a legacy site. For
#' legacy sites, the values of the \code{legacy_var} must contain character strings that
#' act as a legacy site identifier. For non-legacy sites, the values of the
#' \code{legacy_var} column must be \code{NA}. Using this approach,
#' \code{legacy_stratum_var}, \code{legacy_caty_var}, and \code{legacy_aux_var}
#' are not required and should not be used (because \code{legacy_var} represents a column
#' in \code{sframe}). \code{spsurvey} assumes that the legacy sites were selected from
#' a previous sampling design that incorporated randomness into site selection
#' and that the legacy sites are elements of the current sampling frame.
#'
#' @param mindis A numeric value indicating the desired minimum distance between sampled
#' sites. If the sampling design is stratified and \code{mindis} is an numeric value, the minimum
#' distance is applied to all strata. If the sampling design is stratified and different minimum distances
#' are desired among strata, then \code{mindis}
#' is a list whose names match the names of \code{n_base} and whose and values
#' are the minimum distance for the corresponding stratum. If a minimum distance is not desired
#' for a particular stratum, then the corresponding value in \code{mindis} should be \code{0} or
#' \code{NULL} (which is equivalent to \code{0}).
#' The units of \code{mindis} must represent the units in \code{sframe}. A warning is returned if the
#' minimum distance could not be reached after \code{maxtry} attempts. If legacy sites are used, the minimum distance
#' requirement (and subsequent warning if \code{maxtry} attempts are reached) is enforced for all base sites
#' that are not legacy sites (i.e., the minimum distance is enforced for these sites
#' by comparing distances against all base sites (legacy and non-legacy)).
#'
#' @param maxtry The number of maximum attempts to apply the minimum distance algorithm to obtain
#' the desired minimum distance between sites. Each iteration takes roughly as long as the
#' standard GRTS algorithm. Successive iterations will always contain at least as many
#' sites satisfying the minimum distance requirement as the previous iteration. The algorithm stops
#' when the minimum distance requirement is met or there are \code{maxtry} iterations.
#' The default number of maximum iterations is \code{10}.
#'
#' @param n_over The number of reverse hierarchically ordered (rho) replacement sites.
#' If the sampling design is unstratified, then
#' \code{n_over} is an integer specifying the number of rho replacement sites desired.
#' If the sampling design is stratified,
#' then \code{n_over} is a vector (or list) whose names match the names of \code{n_base} and
#' whose values indicate the number of rho replacement sites for each stratum.
#' If replacement sites are not desired for a particular stratum, then the corresponding
#' value in \code{n_over} should be \code{0} or \code{NULL} (which is equivalent to \code{0}).
#' If the sampling design is stratified but the number of \code{n_over} sites is the same in each
#' stratum, \code{n_over} can be a vector which is used for each stratum.
#' If \code{n_over} is an unnamed, length-one vector, it's value is recycled
#' and used for each stratum. Note that if the
#' sampling design has unequal selection probabilities (\code{seltype = "unequal"}), then \code{n_over} sites
#' are given the same proportion of \code{caty_n} values as \code{n_base}.
#'
#'
#' @param n_near The number of nearest neighbor (nn) replacement sites.
#' If the sampling design is unstratified, \code{n_near} is integer from \code{1}
#' to \code{10} specifying the number of
#' nn replacement sites to be selected for each base site. If the sampling design
#' is stratified but the same number of nn replacement sites is desired
#' for each stratum, \code{n_near} is integer from \code{1}
#' to \code{10} specifying the number of
#' nn replacement sites to be selected for each base site. If the sampling design is
#' unstratified and a different number of nn replacement sites is
#' desired for each stratum, \code{n_near} is a vector (or list) whose names represent strata and whose
#' values is integer from \code{1}
#' to \code{10} specifying the number of
#' nn replacement sites to be selected for each base site in the stratum. If replacement sites
#' are not desired for a particular stratum, then the corresponding value in \code{n_over}
#' should be \code{0} or \code{NULL} (which is equivalent to \code{0}). For
#' infinite sampling frames, the distance between a site and its nn
#' depends on \code{pt_density}. The larger \code{pt_density}, the closer the nn neighbors.
#'
#' @param wgt_units The units used to compute the design weights. These
#' units must be standard units as defined by the \code{set_units()} function in
#' the units package. The default units match the units of the sf object.
#'
#' @param pt_density A positive integer controlling the density of the GRTS approximation
#' for infinite sampling frames. The GRTS approximation for infinite sample
#' frames vastly improves computational efficiency by generating many finite points and
#' selecting a sample from the points. \code{pt_density} represents the density
#' of finite points per unit to use in the approximation. More specifically,
#' for each stratum, the number of points used in the approximation equals
#' \code{pt_density * (n_base + n_over)}. A larger value of \code{pt_density}
#' means a closer approximation to the infinite sampling frame but less
#' computational efficiency. The default value of \code{pt_density} is \code{10}. Note that
#' when used with \code{caty_n}, the unequal inclusion probabilities generated from
#' this approach are also approximations.
#'
#' @param DesignID A character string indicating the naming structure for each
#' site's identifier selected in the sample, which is matched with \code{SiteBegin} and
#' included as a variable in the
#' sf object in the function's output. Default is "Site".
#'
#' @param SiteBegin A character string indicating the first number to use to match
#' with \code{DesignID} while creating each site's identifier selected in the sample.
#' Successive sites are given successive integers. The default starting number
#' is \code{1} and the number of digits is equal to number of digits in
#' \code{nbase + nover}.
#' For example, if \code{nbase} is 50 and \code{nover} is 0, then the default
#' site identifiers are \code{Site-01} to \code{Site-50}
#'
#' @param sep A character string that acts as a separator between
#' \code{DesignID} and \code{SiteBegin}. The default is \code{"-"}.
#'
#' @param projcrs_check A check for whether the coordinates are projected. If \code{TRUE},
#' an error is returned if coordinates are not projected (i.e., they are geographic or NA). If \code{FALSE}, the
#' check is not performed, which means that the crs in \code{sframe} (and \code{legacy_sites} if provided) can be projected, geographic, or NA.
#'
#' @details \code{n_base} is the number of sites used to calculate
#' the design weights, which is typically the number of sites used in an analysis. When a panel sampling design is implemented, \code{n_base} is typically the
#' number of sites in all panels that will be sampled in the same temporal period --
#' \code{n_base} is not the total number of sites in all panels. The sum of \code{n_base} and
#' \code{n_over} is equal to the total number of sites to be visited for all panels plus
#' any replacement sites that may be required.
#'
#' @return The sampling design sites and additional information about the sampling design. More specifically, it is, a list with five elements:
#' \itemize{
#' \item \code{sites_legacy} An sf object containing legacy sites. This is
#' \code{NULL} if legacy sites were not included in the sample.
#' \item \code{sites_base} An sf object containing the base sites. This is \code{NULL}
#' if \code{n_base} equals the number of legacy sites.
#' \item \code{sites_over} An sf object containing the reverse hierarchically
#' ordered replacement sites. This is \code{NULL} if no reverse hierarchically
#' ordered replacement sites were included in the sample.
#' \item \code{sites_near} An sf object containing the nearest neighbor
#' replacement sites. This is \code{NULL} if no nearest neighbor replacement
#' sites were included in the sample.
#' \item \code{design} A list documenting the specifications of this sampling design.
#' This can be checked to verify your sampling design ran as intended.
#' \itemize{
#' \item \code{call} The original function call.
#' \item \code{stratum_var} The name of the stratification variable in \code{sframe}.
#' This equals \code{NULL} if no stratification is used.
#' \item \code{stratum} The unique strata. This equals \code{"None"} if
#' the sampling design is unstratified.
#' \item \code{n_base} The base sample size per stratum.
#' \item \code{seltype} The selection type per stratum.
#' \item \code{caty_var} The name of the unequal probability variable in \code{sframe}.
#' This equals \code{NULL} if no unequal probability variable is used.
#' \item \code{caty_n} The expected sample sizes for each level of the
#' unequal probability grouping variable per stratum. This equals
#' \code{NULL} when \code{seltype} is not \code{"unequal"}.
#' \item \code{aux_var} The name of the proportional probability (auxiliary) variable in \code{sframe}.
#' This equals \code{NULL} if no proportional probability variable is used.
#' \item \code{legacy} A logical variable indicating whether legacy sites
#' were included in the sample.
#' \item \code{legacy_stratum_var} The name of the stratification variable in \code{legacy_sites}.
#' Omitted if legacy sites are not used. This equals \code{NULL} if legacy sites were used but
#' no stratification variable is used.
#' \item \code{legacy_caty_var} The name of the unequal probability variable in \code{legacy_sites}.
#' Omitted if legacy sites are not used. This equals \code{NULL} if legacy sites were used but
#' no unequal probability variable is used.
#' \item \code{legacy_aux_var} The name of the proportional probability (auxiliary)
#' variable in \code{legacy_sites}.
#' Omitted if legacy sites are not used. This equals \code{NULL} if legacy sites
#' were used but no proportional probability variable is used.
#' \item \code{mindis} The minimum distance requirement desired. This
#' is \code{NULL} when no minimum distance requirement was applied.
#' \item \code{n_over} The reverse hierarchically ordered replacement
#' site sample sizes per stratum. If \code{seltype} is \code{unequal},
#' this represents the expected sample sizes. This is \code{NULL}
#' when no reverse hierarchically ordered replacement sites were selected.
#' \item \code{n_near} The number of nearest neighbor replacement sites
#' desired. This is \code{NULL} when no nearest neighbor replacement
#' sites were selected.
#' }
#' }
#' When non-\code{NULL}, the \code{sites_legacy}, \code{sites_base},
#' \code{sites_over}, and \code{sites_near} objects contain the original columns
#' in \code{sframe} and include a few additional columns. These additional columns
#' are
#' \itemize{
#' \item \code{siteID} A site identifier (as named using the \code{DesignID}
#' and \code{SiteBegin} arguments to \code{grts()}).
#' \item \code{siteuse} Whether the site is a legacy site (\code{Legacy}), base
#' site (\code{Base}), reverse hierarchically ordered replacement site
#' (\code{Over}), or nearest neighbor replacement site (\code{Near}).
#' \item \code{replsite} The replacement site ordering. \code{replsite} is
#' \code{None} if the site is not a replacement site, \code{Next} if it is
#' the next reverse hierarchically ordered replacement site to use, or
#' \code{Near_}, where the word following \code{_} indicates the ordering of sites closest to
#' the originally sampled site.
#' \item \code{lon_WGS84} Longitude coordinates using the WGS84 coordinate
#' system (EPSG:4326). Only given if coordinates are projected.
#' \item \code{lat_WGS84} Latitude coordinates using the WGS84 coordinate
#' system (EPSG:4326). Only given if coordinates are projected.
#' \item \code{X} Longitude coordinates using the provided coordinate
#' system. Only given if coordinates are not projected (i.e., they are geographic or NA).
#' \item \code{Y} Latitude coordinates using the provided coordinate
#' system. Only given if coordinates are not projected (i.e., they are geographic or NA).
#' \item \code{stratum} A stratum indicator. \code{stratum} is \code{None}
#' if the sampling design was unstratified. If the sampling design was \code{stratified},
#' \code{stratum} indicates the stratum.
#' \item \code{wgt} The design weight.
#' \item \code{ip} The site's original inclusion probability (the reciprocal)
#' of (\code{wgt}).
#' \item \code{caty} An unequal probability grouping indicator. \code{caty}
#' is \code{None} if the sampling design did not use unequal inclusion probabilities.
#' If the sampling design did use unequal inclusion probabilities, \code{caty}
#' indicates the unequal probability level.
#' \item \code{aux} The auxiliary proportional probability variable. This
#' column is only returned if \code{seltype} was \code{proportional} in the
#' original sampling design.
#' }
#' If any columns in \code{sframe} contain these names, those columns
#' from \code{sframe} will be automatically prefixed with \code{sframe_}
#' in the \code{sites} object. When output is printed, a summary of site counts by
#' the levels in \code{stratum_var} and \code{caty_var} is shown.
#'
#' @author Tony Olsen \email{olsen.tony@@epa.gov}
#'
#' @seealso
#' \describe{
#' \item{\code{\link{irs}}}{ to select a sample that is not spatially balanced}
#' }
#'
#' @references
#' Stevens Jr., Don L. and Olsen, Anthony R. (2004). Spatially balanced sampling
#' of natural resources. \emph{Journal of the American Statistical Association}, 99(465), 262-278.
#'
#' @examples
#' \dontrun{
#' samp <- grts(NE_Lakes, n_base = 100)
#' print(samp)
#' strata_n <- c(low = 25, high = 30)
#' samp_strat <- grts(NE_Lakes, n_base = strata_n, stratum_var = "ELEV_CAT")
#' print(samp_strat)
#' samp_over <- grts(NE_Lakes, n_base = 30, n_over = 5)
#' print(samp_over)
#' }
#' @export
################################################################################
grts <- function(sframe, n_base, stratum_var = NULL, seltype = NULL, caty_var = NULL,
caty_n = NULL, aux_var = NULL, legacy_var = NULL,
legacy_sites = NULL, legacy_stratum_var = NULL,
legacy_caty_var = NULL, legacy_aux_var = NULL, mindis = NULL,
maxtry = 10, n_over = NULL, n_near = NULL, wgt_units = NULL,
pt_density = NULL, DesignID = "Site", SiteBegin = 1, sep = "-", projcrs_check = TRUE) {
if (inherits(sframe, c("tbl_df", "tbl"))) { # identify if tibble class elements are present
class(sframe) <- setdiff(class(sframe), c("tbl_df", "tbl"))
# remove tibble class for rownames warning
}
if (!is.null(legacy_sites) & inherits(legacy_sites, c("tbl_df", "tbl"))) { # identify if tibble class elements are present
class(legacy_sites) <- setdiff(class(legacy_sites), c("tbl_df", "tbl"))
# remove tibble class for rownames warning
}
geom_col_name <- attr(sframe, "sf_column")
st_geometry(sframe) <- "geometry"
if (!is.null(legacy_sites)) {
st_geometry(legacy_sites) <- "geometry"
}
# save initial variable specifications for the design list later
initial_stratum_var <- stratum_var
initial_caty_var <- caty_var
initial_aux_var <- aux_var
# Create warning indicator and data frame to collect all potential issues during
# sample selection
warn_ind <- FALSE
warn_df <- data.frame(stratum = "Stratum", func = "Calling Function", warn = "Message")
# Ensure that the geometry types for sframe are consistent
temp <- st_geometry_type(sframe)
tst <- all(temp %in% c("POINT", "MULTIPOINT")) |
all(temp %in% c("LINESTRING", "MULTILINESTRING")) |
all(temp %in% c("POLYGON", "MULTIPOLYGON"))
if (!tst) {
stop(paste("\nThe geometry types for the survey frame object passed to function grts: \n\"",
unique(st_geometry_type(sframe)), "\" are not consistent.",
sep = ""
))
}
# Drop m and z values to ensure no issues with grts functionality with sf object
if (!is.null(st_m_range(sframe)) | !is.null(st_z_range(sframe))) {
# warn_ind <- TRUE
# warn_df$warn <- "\nThe survey frame object passed to function grts contains m or z values - they are being dropped to ensure functionality in grts."
sframe <- st_zm(sframe)
}
# Determine type of sampling frame: point, line, polygon
if (all(temp %in% c("POINT", "MULTIPOINT"))) sf_type <- "sf_point"
if (all(temp %in% c("LINESTRING", "MULTILINESTRING"))) sf_type <- "sf_linear"
if (all(temp %in% c("POLYGON", "MULTIPOLYGON"))) sf_type <- "sf_area"
if (all(is.null(legacy_sites), is.null(legacy_var))) {
legacy_option <- FALSE
} else {
legacy_option <- TRUE
}
if (is.null(stratum_var)) {
stratum <- NULL
} else {
stratum <- names(n_base)
}
# set default seltype if not provided (based on specification of other variables)
if (is.null(seltype)) {
if (is.null(caty_var) & is.null(aux_var)) {
seltype <- "equal"
} else if (!is.null(caty_var)) {
seltype <- "unequal"
} else {
seltype <- "proportional"
}
}
# check input. If errors, dsgn_check will stop grtspts and report errors.
dsgn_check(
sframe = sframe, sf_type = sf_type, legacy_sites = legacy_sites,
legacy_option = legacy_option, stratum = stratum, seltype = seltype,
n_base = n_base, caty_n = caty_n, n_over = n_over, n_near = n_near,
stratum_var = stratum_var, caty_var = caty_var, aux_var = aux_var,
legacy_stratum_var = legacy_stratum_var, legacy_caty_var = legacy_caty_var,
legacy_aux_var = legacy_aux_var,
legacy_var = legacy_var, mindis = mindis, DesignID = DesignID,
SiteBegin = SiteBegin, maxtry = maxtry, projcrs_check = projcrs_check
)
# preserve original sframe names
sframe_names <- names(sframe)
# preserve original legacy_sites names if needed
if (!is.null(legacy_sites)) {
legacy_sites_names <- names(legacy_sites)
}
## Create variables in sampling frame if needed.
# Create unique sampling frame ID values
sframe$id <- 1:nrow(sframe)
# Assign stratum variable or create it if design not stratified and variable not provided.
if (is.null(stratum_var)) {
stratum_var <- "stratum"
sframe$stratum <- "None"
stratum <- c("None") # names of all strata
} else {
# ensure class for stratum variable is character and assign to stratum
sframe$stratum <- as.character(sframe[[stratum_var]])
}
# set caty, aux and legacy variables in sampling frame if needed
if (!is.null(caty_var)) sframe$caty <- as.character(sframe[[caty_var]])
if (!is.null(aux_var)) sframe$aux <- sframe[[aux_var]]
if (!is.null(legacy_var)) sframe$legacy <- sframe[[legacy_var]]
# set stratum, caty, aux and legacy variables in legacy_sites if needed
# add idpts to legacy_sites
if (legacy_option == TRUE & (sf_type != "sf_point" | ((sf_type == "sf_point") & !is.null(legacy_sites)))) {
legacy_names <- names(legacy_sites)
legacy_sites$idpts <- 1:nrow(legacy_sites)
if (stratum[1] == "None") {
legacy_sites$stratum <- "None"
} else {
if (is.null(legacy_stratum_var)) {
legacy_stratum_var <- stratum_var
}
legacy_sites$stratum <- as.character(legacy_sites[[legacy_stratum_var]])
}
if (!is.null(caty_var)) {
if (is.null(legacy_caty_var)) {
legacy_caty_var <- caty_var
}
legacy_sites$caty <- as.character(legacy_sites[[legacy_caty_var]])
}
if (!is.null(aux_var)) {
if (is.null(legacy_aux_var)) {
legacy_aux_var <- aux_var
}
legacy_sites$aux <- legacy_sites[[legacy_aux_var]]
}
if (is.null(legacy_var)) {
legacy_sites$legacy <- TRUE
legacy_var <- "legacy"
} else {
legacy_sites$legacy <- legacy_sites[[legacy_var]]
}
}
# save initial variable specifications for the design list later
initial_legacy_stratum_var <- legacy_stratum_var
initial_legacy_caty_var <- legacy_caty_var
initial_legacy_aux_var <- legacy_aux_var
## Create a dsgn list object
# variable assignments to dsgn list object
dsgn <- list(
stratum_var = stratum_var, caty_var = caty_var, aux_var = aux_var,
legacy_option = legacy_option, legacy_var = legacy_var, stratum = stratum,
wgt_units = wgt_units, seltype = NULL, n_base = NULL, caty_n = NULL,
n_over = NULL, n_near = NULL, mindis = mindis
)
# seltype
if (length(seltype) == length(stratum)) {
dsgn$seltype <- seltype
names(dsgn$seltype) <- stratum
} else {
tmp <- sapply(stratum, function(x, seltype) {
x <- seltype
}, seltype)
names(tmp) <- stratum
dsgn$seltype <- tmp
}
# n_base
if (length(n_base) == length(stratum)) {
dsgn$n_base <- n_base
names(dsgn$n_base) <- stratum
} else {
tmp <- sapply(stratum, function(x, n_base) {
x <- n_base
}, n_base)
names(tmp) <- stratum
dsgn$n_base <- tmp
}
# caty_n
if (is.list(caty_n)) {
dsgn$caty_n <- caty_n
} else {
tmp <- lapply(stratum, function(x, caty_n) {
x <- caty_n
}, caty_n)
names(tmp) <- stratum
dsgn$caty_n <- tmp
}
# n_over
if (!is.null(n_over)) {
if (is.list(n_over)) {
n_over <- lapply(n_over, function(x) if (all(x == 0)) NULL else x)
dsgn$n_over <- n_over
} else if (!is.null(names(n_over)) && all(sort(stratum) == sort(names(n_over)))) {
tmp <- lapply(stratum, function(x, n_over) {
x <- n_over[[x]]
}, n_over)
names(tmp) <- stratum
dsgn$n_over <- tmp
} else {
tmp <- lapply(stratum, function(x, n_over) {
x <- n_over
}, n_over)
names(tmp) <- stratum
dsgn$n_over <- tmp
}
}
# n_near
if (!is.null(n_near)) {
if (is.list(n_near)) {
n_near <- lapply(n_near, function(x) if (all(x == 0)) NULL else x)
dsgn$n_near <- n_near
} else if (!is.null(names(n_near)) && all(sort(stratum) == sort(names(n_near)))) {
tmp <- lapply(stratum, function(x, n_near) {
x <- n_near[[x]]
}, n_near)
names(tmp) <- stratum
dsgn$n_near <- tmp
} else {
tmp <- lapply(stratum, function(x, n_near) {
x <- n_near
}, n_near)
names(tmp) <- stratum
dsgn$n_near <- tmp
}
}
# mindis
if (!is.null(mindis)) {
if (is.list(mindis)) {
mindis <- lapply(mindis, function(x) if (all(x == 0)) NULL else x)
dsgn$mindis <- mindis
} else {
tmp <- lapply(stratum, function(x, mindis) {
x <- mindis
}, mindis)
names(tmp) <- stratum
dsgn$mindis <- tmp
}
}
# legacy_option
if (legacy_option == TRUE) {
tmp <- sapply(stratum, function(x, legacy_option) {
x <- legacy_option
}, legacy_option)
names(tmp) <- stratum
dsgn$legacy_option <- tmp
}
## select sites for each stratum
rslts <- lapply(dsgn$stratum, grts_stratum,
dsgn = dsgn, sframe = sframe, sf_type = sf_type, wgt_units = wgt_units,
pt_density = pt_density, legacy_option = legacy_option,
legacy_sites = legacy_sites, maxtry = maxtry, warn_ind = warn_ind, warn_df = warn_df
)
names(rslts) <- stratum
# combine across strata
sites_legacy <- NULL
sites_base <- NULL
sites_over <- NULL
sites_near <- NULL
warn_ind <- FALSE
warn_df <- NULL
for (i in 1:length(rslts)) {
sites_legacy <- rbind(sites_legacy, rslts[[i]]$sites_legacy)
sites_base <- rbind(sites_base, rslts[[i]]$sites_base)
sites_over <- rbind(sites_over, rslts[[i]]$sites_over)
sites_near <- rbind(sites_near, rslts[[i]]$sites_near)
if (rslts[[i]]$warn_ind) {
warn_ind <- TRUE
warn_df <- rbind(warn_df, rslts[[i]]$warn_df)
}
}
# spelling change to avoid clash with analysis warnings
if (!is.null(warn_df) && "warning" %in% names(warn_df)) {
names(warn_df)[which(names(warn_df) == "warning")] <- "Warning"
}
# Create a siteID for all sites
ntot <- NROW(sites_legacy) + NROW(sites_base) + NROW(sites_over) + NROW(sites_near)
siteID <- gsub(" ", "0", paste0(DesignID, sep, format(SiteBegin - 1 + 1:ntot, sep = "")))
nlast <- 0
# Create siteID for legacy sites if present using DesignID and SiteBegin
if (!is.null(sites_legacy)) {
row.names(sites_legacy) <- 1:nrow(sites_legacy)
sites_legacy$siteID <- siteID[1:nrow(sites_legacy)]
nlast <- nrow(sites_legacy)
# set siteuse and replsite
sites_legacy$siteuse <- "Legacy"
sites_legacy$replsite <- "None"
}
# Create siteID for base sites using DesignID and SiteBegin
if (!is.null(sites_base)) {
row.names(sites_base) <- 1:nrow(sites_base)
sites_base$siteID <- siteID[(nlast + 1):(nlast + nrow(sites_base))]
nlast <- nlast + nrow(sites_base)
# set siteuse and replsite for base sites
sites_base$siteuse <- "Base"
sites_base$replsite <- "None"
}
# create siteID for n_over sites if any
if (!is.null(n_over)) {
row.names(sites_over) <- 1:nrow(sites_over)
sites_over$siteID <- siteID[(nlast + 1):(nlast + nrow(sites_over))]
nlast <- nlast + nrow(sites_over)
# set siteuse and replsite for n_over sites
sites_over$siteuse <- "Over"
sites_over$replsite <- "Next"
}
# if n_near sample sites, assign base ids to the replacement sites. then add siteIDs
if (!is.null(n_near)) {
tst <- match(paste(sites_near$stratum, sites_near$replsite, sep = "_"),
paste(sites_legacy$stratum, sites_legacy$idpts, sep = "_"),
nomatch = 0
)
sites_near$replsite[tst > 0] <- sites_legacy$siteID[tst]
tst <- match(paste(sites_near$stratum, sites_near$replsite, sep = "_"),
paste(sites_base$stratum, sites_base$idpts, sep = "_"),
nomatch = 0
)
sites_near$replsite[tst > 0] <- sites_base$siteID[tst]
tst <- match(paste(sites_near$stratum, sites_near$replsite, sep = "_"),
paste(sites_over$stratum, sites_over$idpts, sep = "_"),
nomatch = 0
)
sites_near$replsite[tst > 0] <- sites_over$siteID[tst]
# sort by id so that sites_near in same order as sites in sites_base and sites_over
sites_near <- sites_near[order(sites_near$replsite, sites_near$siteuse), ]
row.names(sites_near) <- 1:nrow(sites_near)
# assign siteIDs
sites_near$siteID <- siteID[(nlast + 1):(nlast + nrow(sites_near))]
}
# Add lat/lon in WGS84 or X/Y coordinates
if ((!is.null(sites_base) && is.na(st_crs(sites_base))) | (!is.null(sites_base) && st_is_longlat(st_crs(sites_base))) | (!is.null(sites_legacy) && is.na(st_crs(sites_legacy))) | (!is.null(sites_legacy) && st_is_longlat(st_crs(sites_legacy)))) {
if (!is.null(sites_legacy)) {
sites_legacy$X <- st_coordinates(sites_legacy)[, "X"]
sites_legacy$Y <- st_coordinates(sites_legacy)[, "Y"]
}
if (!is.null(sites_base)) {
sites_base$X <- st_coordinates(sites_base)[, "X"]
sites_base$Y <- st_coordinates(sites_base)[, "Y"]
}
if (!is.null(sites_over)) {
sites_over$X <- st_coordinates(sites_over)[, "X"]
sites_over$Y <- st_coordinates(sites_over)[, "Y"]
}
if (!is.null(sites_near)) {
sites_near$X <- st_coordinates(sites_near)[, "X"]
sites_near$Y <- st_coordinates(sites_near)[, "Y"]
}
# reorder sf object variables by first specifying design names excluding unique
# feature ID id and idpts as they are internal
dsgn_names <- c(
"siteID", "siteuse", "replsite", "X", "Y",
"stratum", "wgt", "ip", "caty", "aux"
)
} else {
if (!is.null(sites_legacy)) {
sites_legacy$lon_WGS84 <- st_coordinates(st_transform(sites_legacy, crs = 4326))[, "X"]
sites_legacy$lat_WGS84 <- st_coordinates(st_transform(sites_legacy, crs = 4326))[, "Y"]
}
if (!is.null(sites_base)) {
sites_base$lon_WGS84 <- st_coordinates(st_transform(sites_base, crs = 4326))[, "X"]
sites_base$lat_WGS84 <- st_coordinates(st_transform(sites_base, crs = 4326))[, "Y"]
}
if (!is.null(sites_over)) {
sites_over$lon_WGS84 <- st_coordinates(st_transform(sites_over, crs = 4326))[, "X"]
sites_over$lat_WGS84 <- st_coordinates(st_transform(sites_over, crs = 4326))[, "Y"]
}
if (!is.null(sites_near)) {
sites_near$lon_WGS84 <- st_coordinates(st_transform(sites_near, crs = 4326))[, "X"]
sites_near$lat_WGS84 <- st_coordinates(st_transform(sites_near, crs = 4326))[, "Y"]
}
# reorder sf object variables by first specifying design names excluding unique
# feature ID id and idpts as they are internal
dsgn_names <- c(
"siteID", "siteuse", "replsite", "lon_WGS84", "lat_WGS84",
"stratum", "wgt", "ip", "caty", "aux"
)
}
dsgn_names_extra <- c(dsgn_names, "xcoord", "ycoord", "idpts")
if (geom_col_name != "geometry") {
# sframe prefix if necessary
if (geom_col_name %in% dsgn_names_extra) {
geom_col_name <- paste("sframe", geom_col_name, sep = "_")
}
# restore original column names
if (!is.null(sites_legacy)) {
st_geometry(sites_legacy) <- geom_col_name
legacy_sites_names[legacy_sites_names == "geometry"] <- geom_col_name
st_geometry(legacy_sites) <- geom_col_name
}
if (!is.null(sites_base)) {
sframe_names[sframe_names == "geometry"] <- geom_col_name
st_geometry(sites_base) <- geom_col_name
}
if (!is.null(sites_over)) {
st_geometry(sites_over) <- geom_col_name
}
if (!is.null(sites_near)) {
st_geometry(sites_near) <- geom_col_name
}
}
# sites_legacy
if (!is.null(sites_legacy)) {
if (sf_type != "sf_point") {
add_names <- dsgn_names[dsgn_names %in% names(sites_legacy)]
legacy_sites_names_good <- legacy_sites_names[!legacy_sites_names %in% dsgn_names_extra]
if (all(legacy_sites_names %in% legacy_sites_names_good)) {
sites_legacy <- subset(sites_legacy, select = c(add_names, legacy_sites_names))
} else {
legacy_sites_names_bad <- legacy_sites_names[legacy_sites_names %in% dsgn_names_extra]
legacy_sites_temp <- legacy_sites[, legacy_sites_names_bad, drop = FALSE]
temp_geometry_col <- which(names(legacy_sites_temp) == attr(legacy_sites_temp, "sf_column"))
legacy_sites_geometry_col <- which(names(legacy_sites) == attr(legacy_sites, "sf_column"))
names(legacy_sites_temp)[-temp_geometry_col] <- paste("legacy_sites", names(legacy_sites_temp)[-temp_geometry_col], sep = "_")
sites_legacy <- st_join(sites_legacy, legacy_sites_temp, join = st_nearest_feature)
sites_legacy <- subset(sites_legacy, select = c(add_names, legacy_sites_names_good[-legacy_sites_geometry_col], names(legacy_sites_temp)[-temp_geometry_col]))
for (i in names(sites_legacy)) {
if (i %in% c("legacy_sites_xcoord", "legacy_sites_ycoord", "legacy_sites_idpts")) {
names(sites_legacy)[which(names(sites_legacy) == i)] <- substring(i, first = 14)
}
}
}
}
if (sf_type == "sf_point") {
add_names <- dsgn_names[dsgn_names %in% names(sites_legacy)]
sframe_names_good <- sframe_names[!sframe_names %in% dsgn_names_extra]
if (all(sframe_names %in% sframe_names_good)) {
sites_legacy <- subset(sites_legacy, select = c(add_names, sframe_names))
} else {
sframe_names_bad <- sframe_names[sframe_names %in% dsgn_names_extra]
sframe_temp <- sframe[, sframe_names_bad, drop = FALSE]
temp_geometry_col <- which(names(sframe_temp) == attr(sframe_temp, "sf_column"))
sframe_geometry_col <- which(names(sframe) == attr(sframe, "sf_column"))
names(sframe_temp)[-temp_geometry_col] <- paste("sframe", names(sframe_temp)[-temp_geometry_col], sep = "_")
sites_legacy <- st_join(sites_legacy, sframe_temp, join = st_nearest_feature)
sites_legacy <- subset(sites_legacy,
select = c(add_names, sframe_names_good[-sframe_geometry_col], names(sframe_temp)[-temp_geometry_col])
)
for (i in names(sites_legacy)) {
if (i %in% c("sframe_xcoord", "sframe_ycoord", "sframe_idpts")) {
names(sites_legacy)[which(names(sites_legacy) == i)] <- substring(i, first = 8)
}
}
}
}
}
# sites_base
# check what design variables are present in sf objects and add if missing
if (!is.null(sites_base)) {
add_names <- dsgn_names[dsgn_names %in% names(sites_base)]
sframe_names_good <- sframe_names[!sframe_names %in% dsgn_names_extra]
if (all(sframe_names %in% sframe_names_good)) {
sites_base <- subset(sites_base, select = c(add_names, sframe_names))
} else {
sframe_names_bad <- sframe_names[sframe_names %in% dsgn_names_extra]
sframe_temp <- sframe[, sframe_names_bad, drop = FALSE]
temp_geometry_col <- which(names(sframe_temp) == attr(sframe_temp, "sf_column"))
sframe_geometry_col <- which(names(sframe) == attr(sframe, "sf_column"))
names(sframe_temp)[-temp_geometry_col] <- paste("sframe", names(sframe_temp)[-temp_geometry_col], sep = "_")
sites_base <- st_join(sites_base, sframe_temp, join = st_nearest_feature)
sites_base <- subset(sites_base, select = c(add_names, sframe_names_good[-sframe_geometry_col], names(sframe_temp)[-temp_geometry_col]))
for (i in names(sites_base)) {
if (i %in% c("sframe_xcoord", "sframe_ycoord", "sframe_idpts")) {
names(sites_base)[which(names(sites_base) == i)] <- substring(i, first = 8)
}
}
}
}
# sites_over
if (!is.null(sites_over)) {
add_names <- dsgn_names[dsgn_names %in% names(sites_over)]
sframe_names_good <- sframe_names[!sframe_names %in% dsgn_names_extra]
if (all(sframe_names %in% sframe_names_good)) {
sites_over <- subset(sites_over, select = c(add_names, sframe_names))
} else {
sframe_names_bad <- sframe_names[sframe_names %in% dsgn_names_extra]
sframe_temp <- sframe[, sframe_names_bad, drop = FALSE]
temp_geometry_col <- which(names(sframe_temp) == attr(sframe_temp, "sf_column"))
sframe_geometry_col <- which(names(sframe) == attr(sframe, "sf_column"))
names(sframe_temp)[-temp_geometry_col] <- paste("sframe", names(sframe_temp)[-temp_geometry_col], sep = "_")
sites_over <- st_join(sites_over, sframe_temp, join = st_nearest_feature)
sites_over <- subset(sites_over, select = c(add_names, sframe_names_good[-sframe_geometry_col], names(sframe_temp)[-temp_geometry_col]))
for (i in names(sites_over)) {
if (i %in% c("sframe_xcoord", "sframe_ycoord", "sframe_idpts")) {
names(sites_over)[which(names(sites_over) == i)] <- substring(i, first = 8)
}
}
}
}
# sites_near
if (!is.null(sites_near)) {
add_names <- dsgn_names[dsgn_names %in% names(sites_near)]
sframe_names_good <- sframe_names[!sframe_names %in% dsgn_names_extra]
if (all(sframe_names %in% sframe_names_good)) {
sites_near <- subset(sites_near, select = c(add_names, sframe_names))
} else {
sframe_names_bad <- sframe_names[sframe_names %in% dsgn_names_extra]
sframe_temp <- sframe[, sframe_names_bad, drop = FALSE]
temp_geometry_col <- which(names(sframe_temp) == attr(sframe_temp, "sf_column"))
sframe_geometry_col <- which(names(sframe) == attr(sframe, "sf_column"))
names(sframe_temp)[-temp_geometry_col] <- paste("sframe", names(sframe_temp)[-temp_geometry_col], sep = "_")
sites_near <- st_join(sites_near, sframe_temp, join = st_nearest_feature)
sites_near <- subset(sites_near, select = c(add_names, sframe_names_good[-sframe_geometry_col], names(sframe_temp)[-temp_geometry_col]))
for (i in names(sites_near)) {
if (i %in% c("sframe_xcoord", "sframe_ycoord", "sframe_idpts")) {
names(sites_near)[which(names(sites_near) == i)] <- substring(i, first = 8)
}
}
}
}
# add function call to dsgn list
# dsgn <- c(list(Call = match.call()), dsgn)
dsgn <- list(
call = match.call(), stratum_var = initial_stratum_var, stratum = dsgn$stratum, n_base = dsgn$n_base,
seltype = dsgn$seltype, caty_var = initial_caty_var, caty_n = dsgn$caty_n, aux_var = initial_aux_var, legacy = dsgn$legacy_option,
mindis = dsgn$mindis, n_over = dsgn$n_over, n_near = dsgn$n_near
)
if (any(dsgn$legacy)) {
dsgn <- c(dsgn, list(legacy_stratum_var = initial_legacy_stratum_var, legacy_caty_var = initial_legacy_caty_var,
legacy_aux_var = initial_legacy_aux_var))
dsgn <- dsgn[c("call", "stratum_var", "stratum", "n_base", "seltype", "caty_var",
"caty_n", "aux_var", "legacy", "legacy_stratum_var", "legacy_caty_var", "legacy_aux_var",
"mindis", "n_over", "n_near")]
}
# create output list
sites <- list(
sites_legacy = sites_legacy, sites_base = sites_base,
sites_over = sites_over, sites_near = sites_near,
design = dsgn
)
# As necessary, output a message indicating that warning messages were generated
# during execution of the program
if (warn_ind) {
warn_df <<- warn_df
if (nrow(warn_df) == 1) {
message("During execution of the program, a warning message was generated. The warning \nmessage is stored in a data frame named 'warn_df'. Enter the following command \nto view the warning message: warnprnt()\n")
} else {
message(paste("During execution of the program,", nrow(warn_df), "warning messages were generated. The warning \nmessages are stored in a data frame named 'warn_df'. Enter the following \ncommand to view the warning messages: warnprnt() \nTo view a subset of the warning messages (say, messages number 1, 3, and 5), \nenter the following command: warnprnt(m=c(1,3,5))\n"))
}
}
# constructor for design class
sites <- structure(sites, class = "sp_design")
# return the survey design sf object
invisible(sites)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.