R/sampling_design.R

Defines functions xy_sample xy_sample_random xy_sample_regular

Documented in xy_sample xy_sample_random xy_sample_regular

#' Sample locations in a study area of rectangular shape
#'
#' @param sp_poly An object of class \code{\link[sp]{SpatialPolygons}} from the
#'   \code{sp} package.
#' @param n Sample size, i.e. number of sample locations.
#' @param M Number of independent samples of size \code{n}.
#' @param method character string; 'random' for completely random placement of
#'   points; 'regular' for systematic sampling (equal distance between points in
#'   x and y direction )
#' @param ... optional argument, passed to the appropriate function for
#'   systematic sampling: optional use of \code{cell_size} to specify the area
#'   of a grid cell instead of the number of points \code{n}; use
#'   \code{random_rot = TRUE} to rotate the sampling grid in a random direction.
#'
#' @return A \code{\link[data.table]{data.table}} object with \code{M} times
#'   \code{n} rows holding an identifier and xy-coordinates.
#' @export
xy_sample <- function(sp_poly, n, M = 1, method = 'random', ...) {
  if (tolower(method) == 'random') {
    return(sample_loc(data = xy_sample_random(sp_poly = sp_poly,
                                              n = n,
                                              M = M),
                      n = n,
                      M = M,
                      method = tolower(method)));
  } else if (tolower(method) == 'regular') {
    return(sample_loc(data = xy_sample_regular(sp_poly = sp_poly,
                                               n = n,
                                               M = M,
                                               ...),
                      n = n,
                      M = M,
                      method = tolower(method)))
  }
}


#' Place a number of points randomly in a polygon of arbitrary shape
#'
#' @param sp_poly An object of class \code{\link[sp]{SpatialPolygons}} from the
#'   \code{sp} package.
#' @param n Sample size
#' @param M Number of independent samples of size \code{n}
#'
#' @details Points are selected following a solution that was presented at a
#'   discussion on stackoverflow.com
#'   (https://stackoverflow.com/questions/11178414/algorithm-to-generate-equally-distributed-points-in-a-polygon).
#'
#'   In a first step the provided polygon is divided into non-overlapping
#'   triangles and the area of each triangle is calculated. To select one point
#'   randomly, one triangle with points P1, P2, P3 is sampled proportional to
#'   its area. Once the triangle is selected, a point P4 is placed randomly
#'   along an arbitrary edge of the triangle using uniform probabilities (in
#'   this implementation it's always the edge between P1 and P2). The final
#'   random point within the triangle is generated by chosing a random point
#'   along the edge between P3 and P4 using non-uniform probabilities.
#'
#'   This procedure is repreated until the desired number of points was
#'   generated.
#'
#' @return A \code{\link[data.table]{data.table}} object with \code{M} times
#'   \code{n} rows holding an identifier and xy-coordinates.
xy_sample_random <- function(sp_poly, n, M = 1) {
  tri <- spatstat::triangulate.owin(spatstat::as.owin(sp_poly));
  a <- unlist(lapply(tri$tiles, spatstat::area));
  w <- a/sum(a);

  x <- unlist(lapply(tri$tiles, function(x) x$bdry[[1]]$x));
  y <- unlist(lapply(tri$tiles, function(x) x$bdry[[1]]$y));

  s <- sample(length(a), n*M, prob = w, replace = TRUE);
  r1 <- runif(n*M);
  r2 <- runif(n*M);

  n_vtc <- 3; # Number of vertices in a triangle
  p4_x <- (1 - r1)*x[(s*n_vtc - 2)] + r1*x[(s*n_vtc - 1)];
  p4_y <- (1 - r1)*y[(s*n_vtc - 2)] + r1*y[(s*n_vtc - 1)];

  p5_x <- (1 - sqrt(r2))*x[(s*n_vtc)] + sqrt(r2)*p4_x;
  p5_y <- (1 - sqrt(r2))*y[(s*n_vtc)] + sqrt(r2)*p4_y;

  return(data.table(id_sample = rep(1:M, each = n),
                    id_point = rep(1:n, M),
                    x_s = p5_x,
                    y_s = p5_y));
}


#' Place a regular sampling grid over a polygon of arbitrary shape
#'
#' @param sp_poly An object of class \code{\link[sp]{SpatialPolygons}} from the
#'   \code{sp} package.
#' @param n Sample size
#' @param M Number of independent samples of size \code{n}
#' @param cell_size If missing, a cell size is derived from the sample size
#'   \code{n}; otherwise the cell size is used to calculate the grid spacing
#'   (assuming a square grid).
#' @param random_rot Logical, if TRUE the grid is rotated randomly
#'
#' @details Due to the systematic nature of the generated samples, sample size
#'   is not constant but will on average reach the expected sample size
#'   \code{n}.
#'
#'   Grids are generated with a random starting point choosen from an area in
#'   the lower left corner of the polygon's bounding box. The size of this area
#'   corresponds to the cell size of the grid as set by \code{cell_size} or as
#'   inferred from \code{n} if \code{cell_size} is \code{NULL}.
#'
#'   Sample grids are initially generated for the entire bounding box that
#'   encompasses \code{sp_poly}, and are refined to the actual outline of the
#'   study region in a second step. If \code{M} becomes large (say > 10000), the
#'   computation of the \code{M} samples will take while.
#' @return A \code{\link[data.table]{data.table}} object with approximately
#'   \code{M} times \code{n} rows holding an identifier and xy-coordinates.
xy_sample_regular <- function(sp_poly, n, M = 1, cell_size = NULL, random_rot = TRUE) {
  e <- sp::bbox(sp_poly);
  a_ext <- prod(apply(e, 1, diff));
  a <- sum(extract_area(sp_poly));

  if (is.null(cell_size)) {
    n_os <- n*a_ext/a; # Oversampling
    cell_size <- a_ext/n_os;
  }

  d <- sqrt(cell_size);
  n_os <- a_ext/d^2;

  dt_res <- data.table(id_sample = rep(0L, M*n_os*2), # 100% extra rows because of random sample size, empty rows removed later on
                       id_point = 0L,
                       x_s = 0.0,
                       y_s = 0.0);

  # Center coordinates
  e_cent <- e;
  e_cent["x", ] <- e["x", ] - e["x", "max"]/2;
  e_cent["y", ] <- e["y", ] - e["y", "max"]/2;
  r_start <- 0L;
  r_end <- 0L;
  if (random_rot) {
    alpha <- runif(M, min = 0, max = pi);
  }
  u_d <- cbind(runif(M)*d, runif(M)*d);
  for (m in seq.int(M)) {
    # Enlarge grid to account for possible rotation
    x <- seq(1.5*min(e_cent["x", "min"]) + u_d[m, 1], 1.5*max(e_cent["x", "max"]), d);
    y <- seq(1.5*min(e_cent["y", "min"]) + u_d[m, 2], 1.5*max(e_cent["y", "max"]), d);
    xy <- expand.grid(x, y);
    # Random rotation
    if (random_rot) {
      rot <- matrix(c(cos(alpha[m]), -sin(alpha[m]),
                      sin(alpha[m]), cos(alpha[m])),
                    ncol = 2, byrow = TRUE);
      xy <- t(rot %*% t(as.matrix(xy)));
    }
    # Back to original coordinates
    xy[, 1] <- xy[, 1] + e["x", "max"]/2;
    xy[, 2] <- xy[, 2] + e["y", "max"]/2;
    sp_xy <- sp::SpatialPoints(xy);

    # Reject points outside the study area
    idx_over <- is.na(sp::over(sp_xy, sp_poly)) == FALSE;
    xy_sub <- sp::coordinates(sp_xy[idx_over]);

    # Collect results
    n_m <- nrow(xy_sub);
    r_start <- r_end + 1L;
    r_end <- r_start + n_m - 1L;
    set(dt_res,
        r_start:r_end,
        1:4,
        data.table(id_sample = rep(m, n_m),
                   id_point = 1:n_m,
                   x_s = xy_sub[, 1],
                   y_s = xy_sub[, 2]));
  }
  return(dt_res[id_sample != 0]);
}
AWF-GAUG/fisim documentation built on May 28, 2019, 11:02 a.m.