R/sandwich.model.r
In sandwichr: Spatial Prediction Based on Spatial Stratified Heterogeneity

Documented in sandwich.model

#' Perform Sandwich model-based mapping
#'
#' @description
#' Estimate the mean and standard error for each reporting unit using SSH-based spatial prediction.
#'
#' @usage sandwich.model(object,
#'        sampling.attr,
#'        type="shp",
#'        ssh.id.col=NULL,
#'        ssh.weights=NULL)
#'
#' @param object When \code{type="shp"}, \code{object} is a list of three \code{sf} objects generated by \code{\link{load.data.shp}}, including a point \code{sf} object used as the sampling layer, a polygon \code{sf} object used as the SSH layer, and a polygon \code{sf} object used as the SSH layer. When \code{type="txt"}, \code{object} is a list of two data frames generated by \code{\link{load.data.txt}}, including a file linking sampling and SSH layers and a file linking reporting and SSH layers.
#' @param sampling.attr Text for the name of the attribute to be predicted in the sampling layer.
#' @param type Text for the type of input data. \code{type="shp"} denotes shapefiles, and \code{type="txt"} denotes text files. By default, \code{type="shp"}.
#' @param ssh.id.col Text for the column that specifies which stratum each sampling unit falls into (see \code{\link{load.data.txt}}). Set to \code{NULL} when \code{type="shp"}.
#' @param ssh.weights A \code{list} that specifies the strata in the SSH layer and their corresponding columns of weights in \code{reporting_ssh.file} (see \code{\link{load.data.txt}}).
#'
#' @return A \code{sandwich.ci} object that contains the estimated mean and standard deviation for each reporting unit.
#'
#' @references
#' Wang, J. F., Haining, R., Liu, T. J., Li, L. F., & Jiang, C. S. (2013). Sandwich estimation for multi-unit reporting on a stratified heterogeneous surface. \emph{Environment and Planning A}, 45(10), 2515-2534.
#'
#' @import sf lwgeom
#' @importFrom stats var
#' @name sandwich.model
#' @export
#'
#' @seealso \code{\link{load.data.shp}}, \code{\link{load.data.txt}}
#'
#' @examples
#' data(sim.data)
#' sim.sw <- sandwich.model(object=sim.data, sampling.attr="Value", type="shp")
#'
# ---- End of roxygen documentation ----


sandwich.model <- function(object, sampling.attr, type="shp", ssh.id.col=NULL, ssh.weights=NULL){
  if (type == "shp"){

    sampling.lyr = object[[1]]
    ssh.lyr = object[[2]]
    reporting.lyr = object[[3]]

    #--------------------------- Check inputs ----------------------------------
    if (st_geometry_type(sampling.lyr, by_geometry=FALSE) != "POINT"){
      stop("Geometry type of the sampling layer should be POINT.")
    }
    if (st_geometry_type(ssh.lyr, by_geometry=FALSE) != "POLYGON" &
        st_geometry_type(ssh.lyr, by_geometry=FALSE) != "MULTIPOLYGON"){
      stop("Geometry type of the SSH layer should be POLYGON or MULTIPOLYGON.")
    }
    if (st_geometry_type(reporting.lyr, by_geometry=FALSE) != "POLYGON" &
        st_geometry_type(reporting.lyr, by_geometry=FALSE) != "MULTIPOLYGON"){
      stop("Geometry type of the reporting layer should be POLYGON or MULTIPOLYGON.")
    }
    if (!is.element(sampling.attr, names(sampling.lyr))){
      stop("Attribute name not found in the sampling layer.")
    }

    #---------------- Calculating sample means and SEs for SSH layer -------------
    ssh.lyr$mean = 0
    ssh.lyr$se = 0
    ssh.lyr$df = 0
    for (i in 1:(nrow(ssh.lyr))){
      z.pts = suppressMessages(st_intersection(sampling.lyr, ssh.lyr[i,]))
      if (nrow(z.pts) > 1){
        ssh.lyr[i,]$mean = mean(z.pts[[sampling.attr]])
        z.v = var(z.pts[[sampling.attr]])
        ssh.lyr[i,]$se = sqrt(z.v / nrow(z.pts))
        ssh.lyr[i,]$df = nrow(z.pts) - 1
      }
    }

    #---------------- Calculating values and SEs for reporting layer -------------
    reporting.lyr$mean = 0
    reporting.lyr$se = 0
    reporting.lyr$df = 0
    for (i in 1:(nrow(reporting.lyr))){
      for (j in 1:(nrow(ssh.lyr))){
        if (suppressMessages(st_intersects(st_geometry(ssh.lyr[j,]), st_geometry(reporting.lyr[i,]), sparse = FALSE)[1])){
          r.poly = suppressMessages(st_intersection(st_geometry(ssh.lyr[j,]), st_geometry(reporting.lyr[i,])))
          r.w = as.numeric(st_area(r.poly)) / as.numeric(st_area(reporting.lyr[i,]))
          reporting.lyr[i,]$mean = reporting.lyr[i,]$mean + r.w * ssh.lyr[j,]$mean
          reporting.lyr[i,]$se = reporting.lyr[i,]$se + r.w ^2 * ssh.lyr[j,]$se ^2
          reporting.lyr[i,]$df = reporting.lyr[i,]$df + ssh.lyr[j,]$df
        }
        reporting.lyr[i,]$se = sqrt(reporting.lyr[i,]$se)
      }
    }

    output = list(object=reporting.lyr, sample_size=nrow(sampling.lyr), ssh_num=nrow(ssh.lyr), reporting_num=nrow(reporting.lyr))
    class(output) <- "sandwich.model"
    return(output)}
  else if (type == "txt"){

    sampling_ssh = object[[1]]
    reporting_ssh = object[[2]]

    #--------------------------- Check inputs ----------------------------------
    if (!ssh.id.col %in% names(sampling_ssh) | is.null(ssh.id.col)){
      stop("Column name ssh.id.col not exists in the file linking sampling and SSH layers.")
    }
    if (!all(ssh.weights[[2]] %in% names(reporting_ssh)) | is.null(ssh.weights)){
      stop("Some columns in ssh.weights not exist in the file linking reporting and SSH layers.")
    }
    if (!all(sort(ssh.weights[[1]]) == sort(unique(sampling_ssh[[ssh.id.col]])))){
      stop("ssh.weights not matches with the values in column ssh.id.col")
    }
    if (!is.element(sampling.attr, names(sampling_ssh))){
      stop("Attribute name not found in the file linking sampling and SSH layers.")
    }

    #---------------- Calculating sample means and SEs for SSH layer -------------
    ssh = unique(sampling_ssh[ssh.id.col])
    ssh$mean = 0
    ssh$se = 0
    ssh$df = 0
    for (i in 1:(nrow(ssh))){
      z.pts = sampling_ssh[sampling_ssh[[ssh.id.col]]==ssh[i,][[ssh.id.col]],]
      if (nrow(z.pts) > 1){
        ssh[i,]$mean = mean(z.pts[[sampling.attr]])
        z.v = var(z.pts[[sampling.attr]])
        ssh[i,]$se = sqrt(z.v / nrow(z.pts))
        ssh[i,]$df = nrow(z.pts) - 1
      }
    }

    #---------------- Calculating values and SEs for reporting layer -------------
    reporting_ssh$mean = 0
    reporting_ssh$se = 0
    reporting_ssh$df = 0
    for (i in 1:(nrow(reporting_ssh))){
      for (j in 1:(length(ssh.weights[[2]]))){
        k = which(ssh[[ssh.id.col]] == ssh.weights[[1]][j])
        r.w = reporting_ssh[i,][[ssh.weights[[2]][j]]]
        if (r.w != 0 & !is.na(r.w != 0)){
          reporting_ssh[i,]$mean = reporting_ssh[i,]$mean + r.w * ssh[k,]$mean
          reporting_ssh[i,]$se = reporting_ssh[i,]$se + r.w ^2 * ssh[k,]$se ^2
          reporting_ssh[i,]$df = reporting_ssh[i,]$df + ssh[k,]$df
        }
        reporting_ssh[i,]$se = sqrt(reporting_ssh[i,]$se)
      }
    }

    output = list(object=reporting_ssh, sample_size=nrow(sampling_ssh), ssh_num=nrow(ssh), reporting_num=nrow(reporting_ssh))
    class(output) <- "sandwich.model"
    return(output)}
}