Nothing
# BAS.R
#' @name BAS
#'
#' @title Balanced Acceptance Sampling (BAS).
#'
#' @description BAS draws spatially balanced samples from areal resources. To draw BAS samples,
#' spbal requires a study region shapefile and the region’s bounding box. An initial sample size
#' is also needed, which can be easily increased or decreased within spbal for master sampling
#' applications
#'
#' @author This function was first written by Paul van Dam-Bates for the
#' package BASMasterSample and later simplified by Phil Davies.
#'
#' @param shapefile Shape file as a polygon (sp or sf) to select sites for.
#' @param n Number of sites to select. If using stratification it is a named vector containing
#' sample sizes of each group.
#' @param boundingbox Bounding box around the study area. If a bounding box is not supplied
#' then spbal will generate a bounding box for the shapefile.
#' @param minRadius If specified, the minimum distance, in meters, allowed between sample
#' points. This is applied to the $sample points. Points that meet the minRadius criteria
#' are retuned in the minRadius output variable.
#' @param panels A list of integers that define the size of each panel in a
#' non-overlapping panels design. The length of the list determines the number of
#' panels required. The sum of the integers in the panels parameter will determine
#' the total number of samples selected, n. The default value for panels is NULL,
#' this indicates that a non-overlapping panel design is not wanted.
#' @param panel_overlap A list of integers that define the overlap into the previous
#' panel. Is only used when the panels parameter is not NULL. The default value for
#' panel_overlap is NULL. The length of panel_overlap must be equal to the length
#' of panels. The first value is always forced to zero as the first panel never
#' overlaps any region.
#' @param stratum The name of a column in the data.frame attached to shapefile that defines
#' the strata of interest.
#' @param seeds A vector of 2 seeds, u1 and u2. If not specified, the default is NULL and will
#' be defined randomly using function \code{generateUVector}.
#' @param verbose Boolean if you want to see any output printed to screen. Helpful if taking a
#' long time. Default is FALSE i.e. no informational messages are displayed.
#'
#' @return A list containing three variables, \code{$sample} containing locations in the BAS sample,
#' in BAS order, \code{$seeds}, the u1 and u2 seeds used to generate the sample and \code{$minRadius}
#' containing points from $sample that meet the minRadius criteria. If the minRadius
#' parameter is NULL then the $minRadius returned will also be NULL.
#'
#' The sample points are returned in the form of a simple feature collection of POINT objects.
#' They have the following attributes:
#' \itemize{
#' \item \code{SiteID} A unique identifier for every sample point. This
#' encodes the BAS order.
#' \item \code{spbalSeqID} A unique identifier for every sample point. This
#' encodes the BAS sample order.
#' \item \code{geometry} The XY co-ordinates of the sample point in the CRS of the original
#' shapefile.
#' }
#' @examples
#' # Equal probability BAS sample ----------------------------------------------
#'
#' # Use the North Carolina shapefile supplied in the sf R package.
#' shp_file <- sf::st_read(system.file("shape/nc.shp", package="sf"))
#' shp_gates <- shp_file[shp_file$NAME == "Gates",]
#'
#' # Vertically aligned master sample bounding box.
#' bb <- spbal::BoundingBox(shapefile = shp_gates)
#'
#' set.seed(511)
#' n_samples <- 20
#' # Equal probability BAS sample.
#' result <- spbal::BAS(shapefile = shp_gates,
#' n = n_samples,
#' boundingbox = bb)
#' BAS20 <- result$sample
#' # display first three sample points.
#' BAS20[1:3,]
#'
#' # Increase the BAS sample size ----------------------------------------------
#' n_samples <- 50
#' result2 <- spbal::BAS(shapefile = shp_gates,
#' n = n_samples,
#' boundingbox = bb,
#' seeds = result$seed)
#' BAS50 <- result2$sample
#' BAS50[1:3,]
#'
#' # Check, first n_samples points in both samples must be the same.
#' all.equal(BAS20$geometry, BAS50$geometry[1:20])
#'
#' @export
BAS <- function(shapefile = NULL,
n = 100,
boundingbox = NULL,
minRadius = NULL,
panels = NULL,
panel_overlap = NULL,
stratum = NULL,
seeds = NULL,
verbose = FALSE){
# initialise.
bb.new <- NULL
# validate shapefile and other BAS parameters.
# validate the shapefile parameter.
if(base::is.null(shapefile)){
base::stop("spbal(BAS) The shapefile parameter must be used. Please specify a shapefile.")
}
shp_geometry <- sf::st_geometry_type(shapefile)
if (!base::all(shp_geometry %in% c("MULTIPOLYGON", "POLYGON"))){
msg <- "spbal(BAS) Unsupported geometry in shapefile, %s."
msgs <- base::sprintf(msg, shp_geometry)
base::stop(msgs)
}
# A bounding box must be specified.
# If user has not specified a bb then we will generate one.
if(base::is.null(boundingbox)){
boundingbox <- spbal::BoundingBox(shapefile = shapefile)
bb.new <- boundingbox
}
if(!base::is.null(minRadius)){
# must be numeric, greater >= 1.
validate_parameters("minRadius", c(minRadius))
}
# validate panel design if we are using one.
res <- ValidatePanelDesign(panels, panel_overlap, n)
panel_design <- res$panel_design
number_panels <- res$number_panels
panel_overlap <- res$panel_overlap
n <- res$n
# stratification wanted?
if(base::is.null(stratum)){
# no stratification, just a simple sample wanted.
result <- getBASSampleDriver(shapefile = shapefile,
n = n,
bb = boundingbox,
seeds = seeds,
verbose = verbose)
smp <- result$sample
seeds <- result$seed
}else{
if(base::is.null(base::names(n))) base::stop("spbal(BAS) Need design sample size as n = named vector")
strata.levels <- base::names(n)
smp <- NULL
# for every stratum...
for(k in 1:base::length(n)){
if(verbose){
msg <- "spbal(BAS) Stratum: %s."
msgs <- base::sprintf(msg, strata.levels[k])
base::message(msgs)
}
k.indx <- base::which(shapefile[, stratum, drop = TRUE] == strata.levels[k])
shp.stratum <- shapefile[k.indx,]
result <- getBASSampleDriver(shapefile = shp.stratum,
n = n[k],
bb = boundingbox,
seeds = seeds,
verbose = verbose)
smp.s <- result$sample
seeds <- result$seed
smp.s[stratum] <- strata.levels[k]
smp <- base::rbind(smp, smp.s)
} # end for k.
} # end is.null(stratum).
# go assign panelid's if required.
res <- PanelDesignAssignPanelids(smp, panels, panel_overlap, panel_design, number_panels)
# go filter res$sample if user has specified a minimum radius.
S <- NULL
if(!is.null(minRadius)){
S <- filterOnDistance(res$sample, minRadius)
}
# specific column ordering in res$sample.
fixed_order <- base::c("SiteID", "spbalSeqID")
remaining_cols <- base::names(res$sample)[-base::match(fixed_order, base::names(res$sample))]
new_col_order <- base::c(fixed_order, remaining_cols)
sample <- res$sample[, new_col_order]
# add the "Count" feature for NZ_DOC (acts as a unique ID for backward compatibility purposes).
Count <- sample$SiteID
# assign the spbal attribute to the sample being returned, i.e. the function that created it.
base::attr(sample, "spbal") <- "BAS"
# return the sample and the u1, u2 seeds used.
result <- base::list(sample = sample,
seed = seeds,
minRadius = S,
bb = bb.new,
Count = Count)
return(result)
}
#' @name getBASSampleDriver
#'
#' @title Manage BAS sampling.
#'
#' @description This function repeatedly calls function spbal::getBASSample to generate the BAS
#' sample. Once the requested number of points within the intersection of the shapefile and the
#' study area have been obtained, the sample and seeds are returned to the caller.
#'
#' @author This function was written by Phil Davies based on origin code by Paul van Dam-Bates
#' from the BASMasterSample package.
#'
#' @param shapefile sf shape file as a polygon to select sites from.
#' @param bb Bounding box which defines the area around the study area. A bounding box must be
#' supplied.
#' @param n Number of sites to select. If using stratification it is a named vector containing
#' sample sizes of each group.
#' @param seeds A vector of 2 seeds, u1 and u2. If not specified, the default is NULL and will
#' be defined randomly.
#' @param verbose Boolean if you want to see any output printed to screen. Helpful if taking a
#' long time. Default is FALSE i.e. no informational messages are displayed.
#'
#' @return A list containing two variables, \code{$sample} containing locations in the BAS sample,
#' in BAS order and \code{$seeds}, the u1 and u2 seeds used to generate the sample.
#'
#' @keywords internal
getBASSampleDriver <- function(shapefile, bb, n, seeds, verbose = FALSE){
# The assumption is that if seeds are not null then user has previously run this to get
# a sample and associated seeds.
if(base::is.null(seeds)){
# seeds <- generateUVector()
# find the first point in the study region (picked at random), specifically we want to
# find the seeds that give us the first point in the study region.
# first.pt <- findFirstStudyRegionPoint(shapefile = shapefile, bb = bb, seeds = seeds, verbose = verbose)
seeds <- findBASSeed(shapefile = shapefile, bb = bb, n = 1, verbose = verbose)
if(verbose){
msg <- "spbal(getBASSampleDriver) Seeds u1 = %s, u2 = %s gave first study region point."
msgs <- base::sprintf(msg, seeds[1], seeds[2])
base::message(msgs)
}
}
# Check bounding box and find efficient Halton indices (boxes)
BASInfo <- setBASIndex(shapefile, bb, seeds)
boxes <- BASInfo$boxes
# number of samples required.
draw <- n * 6
# just the first point so far, need n.
num_samples <- 0
n_samples <- 0
## Track indices to match boxes.
boxdraw <- 0
# count number of times we call spbal::getBASSample.
call.getBASSample.cnt <- 0
# keep generating BAS samples until we find n sample points in the study area.
while(num_samples < n){
# double the number of points to find to try and reduce number of loops.
# draw <- draw * 2 ## Can lead to slow results...
# Go to next set of boxes if repeating loop.
boxes <- BASInfo$boxes + (BASInfo$B * boxdraw)
# Create indices repeating every Bth for each box until a full draw is taken.
while(base::length(boxes) < draw){
boxdraw <- boxdraw + 1
boxes <- base::c(boxes, BASInfo$boxes + (boxdraw * BASInfo$B))
}
boxdraw <- boxdraw + 1
# go get sample.
pts.sample <- getBASSample(shapefile = shapefile, bb = bb , n = draw, seeds = seeds, boxes = boxes)
n_samples <- base::length(pts.sample$sample$SiteID)
# First time create ret_sample
if(n_samples > 0 & num_samples == 0) ret_sample <- pts.sample$sample
# If some samples are found, and samples were previously found, bind them.
if(n_samples > 0 & num_samples > 0) ret_sample <- base::rbind(ret_sample, pts.sample$sample)
if(verbose){
msg <- "spbal(getBASSampleDriver) after getBASSample n_samples = %s. num_samples = %s"
msgs <- base::sprintf(msg, n_samples, num_samples)
base::message(msgs)
}
call.getBASSample.cnt <- call.getBASSample.cnt + 1
num_samples <- num_samples + n_samples
} # end while num_samples < n
if(verbose){
msg <- "spbal(getBASSampleDriver) Needed %s call(s) to obtain %s samples."
msgs <- base::sprintf(msg, call.getBASSample.cnt, n)
base::message(msgs)
}
# add a sample ID for the user.
ret_sample$spbalSeqID <- base::seq(1, base::length(ret_sample$SiteID))
# add the "Count" feature for NZ_DOC (acts as a unique ID for backward compatibility purposes).
#ret_sample$Count <- ret_sample$SiteID
# return sample and seeds to caller.
result <- base::list(sample = ret_sample[1:n,],
seeds = seeds)
return(result)
}
#' @name getBASSample
#'
#' @title Generate the BAS sample.
#'
#' @description This function is repeatedly called from function spbal::getBASSampleDriver
#' to generate a BAS sample.
#'
#' @author This function was written by Phil Davies.
#'
#' @param shapefile Shape file as a polygon (sp or sf) to select sites for.
#' @param bb Bounding box which defines the area around the study area. A bounding box must be
#' supplied.
#' @param n Number of sites to select. If using stratification it is a named vector containing
#' sample sizes of each group.
#' @param seeds A vector of 2 seeds, u1 and u2. seeds must have a value when this function is called.
#' @param boxes A vector of integers for which points along the Halton random starting point to sample from.
#'
#' @return A list containing two variables, \code{$sample} containing locations in the BAS sample,
#' in BAS order and \code{$seeds}, the u1 and u2 seeds used to generate the sample.
#'
#' @keywords internal
getBASSample <- function(shapefile, bb, n, seeds, boxes = NULL){
if(base::is.null(seeds)){
msg <- "spbal(getBASSample) The seeds parameter must not be NULL."
msgs <- base::sprintf(msg)
base::stop(msgs)
}
seedshift <- seeds
if(is.null(boxes)) {
siteid <- base::seq(from = 1, to = n, by = 1)
boxes <- siteid
}else{
siteid <- boxes
}
# Scale and shift Halton to fit into bounding box
bb.bounds <- sf::st_bbox(bb)
scale.bas <- bb.bounds[3:4] - bb.bounds[1:2]
shift.bas <- bb.bounds[1:2]
bases <- base::c(2, 3)
# go generate n Halton points.
res <- cppBASptsIndexed(n = n, seeds = seeds, bases = bases, boxes = boxes)
pts <- res$pts
xy <- base::cbind(pts[,1]*scale.bas[1] + shift.bas[1], pts[,2]*scale.bas[2] + shift.bas[2])
pts.coord <- sf::st_as_sf(base::data.frame(SiteID = siteid, xy), coords = base::c(2, 3))
sf::st_crs(pts.coord) <- sf::st_crs(bb)
# find the intersection. Generates the same as sf::st_intersection(pts.coord, shapefile)
pts.intersect <- pts.coord[shapefile,]
# return the point to the driver.
result <- base::list(sample = pts.intersect,
seeds = seedshift)
return(result)
}
#' @name setBASIndex
#'
#' @title Finds a set of Halton indices that will create BAS points within a shape bounding box.
#'
#' @description This function is designed to be called internally for efficiency in site selection.
#'
#' @details To be used when doing a Master Sample and the bounding box of the greater frame is potentially much
#' larger than the the polygon being sampled. In this case, we don't want to generate points across the
#' entire larger bounding box region and then clip them. Instead, we can make use of the Halton sequence
#' and only generate BAS points near to the shape being sampled. This function finds returns those indices.
#'
#' @param shapefile Shape file as a polygon (sp or sf) to select sites for.
#' @param bb Bounding box which defines the area around the study area. A bounding box must be
#' supplied.
#' @param seeds A vector of 2 seeds, u1 and u2. seeds must have a value when this function is called.
#'
#' @return A list containing two variables, \code{$boxes} containing indices of the BAS sample that fall
#' into the bounding box, \code{$J}, the number of subdivision powers taken to find those boxes, \code{$B},
#' the number of boxes that the indices relate to \code{(1-B)}, \code{$xlim}, the ylimit of the
#' bounding box of the shapefile, shifted to the \code{base[1]^J[1]} coordinates on the unit box
#' \code{[0,1)}, \code{$ylim}, the ylimit of the bounding box of the shapefile, shifted to the
#' \code{base[2]^J[2]} coordinates on the unit box \code{[0,1)}.
#'
#' @keywords internal
setBASIndex <- function(shapefile, bb, seeds = base::c(0,0)){
# Scale and shift Halton to fit into bounding box
bb.bounds <- sf::st_bbox(bb)
inner.bb <- sf::st_bbox(shapefile)
## Check if bb and st_bbox are equivalent, return if they are for efficiency.
if(base::all(inner.bb == bb.bounds)) return(base::list(boxes = 1, B = 1, J = base::c(0, 0), xlim = base::c(0, 1), ylim = base::c(0, 1)))
scale.bas <- bb.bounds[3:4] - bb.bounds[1:2]
shift.bas <- bb.bounds[1:2]
bases <- base::c(2, 3)
inner.pts.scaled <- base::cbind((inner.bb[base::c('xmin', 'xmax')] - shift.bas[1])/scale.bas[1],
(inner.bb[base::c('ymin', 'ymax')] - shift.bas[2])/scale.bas[2])
inner.area <- base::diff(inner.pts.scaled[,1]) * base::diff(inner.pts.scaled[,2])
J <- base::c(0, 0)
box.area <- 1
# 25% area seems like a good rule of thumb from prev code.
while(inner.area/box.area < 0.25){
if(bases[1]^J[1] <= bases[2]^J[2]){
J[1] <- J[1] + 1
}else{
J[2] <- J[2] + 1
}
xlim <- base::c(base::floor(inner.pts.scaled[1,1] / (1/bases[1]^J[1]))/(bases[1]^J[1]),
base::ceiling(inner.pts.scaled[2,1] / (1/bases[1]^J[1]))/(bases[1]^J[1]))
ylim <- base::c(base::floor(inner.pts.scaled[1,2] / (1/bases[2]^J[2]))/(bases[2]^J[2]),
base::ceiling(inner.pts.scaled[2,2] / (1/bases[2]^J[2]))/(bases[2]^J[2]))
box.area <- base::diff(xlim) * base::diff(ylim)
}
B <- base::prod(bases^J)
if(base::all(J == 0)) return(base::list(boxes = 1,
B = 1,
J = base::c(0, 0),
xlim = base::c(0, 1),
ylim = base::c(0, 1)))
## Intersect first B BAS points in the boxes
ptsx <- cppBASptsIndexed(n = B, seeds = seeds[1], bases = bases[1])$pts
indx <- base::which(ptsx[,1] >= xlim[1] & ptsx[,1] < xlim[2])
ptsy <- cppBASptsIndexed(n = 1, seeds = seeds[2], bases = bases[2], boxes = indx)$pts
boxes <- indx[ptsy[,1] >= ylim[1] & ptsy[,1] < ylim[2]]
result <- base::list(boxes = boxes,
J = J,
B = B,
xlim = xlim,
ylim = ylim)
return(result)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.