################################################################################
#
#'
#' Select sampling units from a set of potential sampling units using propensity
#' score matching (PSM)
#'
#' @param x A data.frame of all potential primary sampling units (PSUs) to
#' sample from each uniquely identified and with corresponding populations.
#' @param svy A data.frame of survey data drawn via probability proportional
#' to population size (PPS) approach from all the potential PSUs in *x*.
#' @param psu A single character value or vector of values corresponding to
#' the variable name/s for the primary sampling unit in *x* and *svy*.
#' Default is *"psu"*.
#' @param match A single character value corresponding to the variable name
#' for the primary sampling unit in *x* that matches the primary sampling
#' unit in *svy*. Default is NULL which indicates that primary sampling unit
#' in *x* is the same as primary sampling unit in *svy*.
#' @param pop A single character value corresponding to the variable name for
#' the population figures in *x*. Default is *"pop"*.
#' @param sampling_type Which sampling type to use. Choice between *simple
#' random sample (simple)* or *systematic sample (systematic)*. Default is
#' *simple*.
#'
#' @return A data.frame drawn from *svy* that contains the primary sampling
#' units selected by propensity score matching.
#'
#' @author Mark Myatt and Ernest Guevarra
#'
#' @examples
#' create_sample_psm(
#' x = village_list,
#' svy = sample_data,
#' psu = c("id", "psu"),
#' match = "cluster",
#' pop = "population"
#' )
#'
#' @export
#'
#
################################################################################
create_sample_psm <- function(x, svy,
psu = "psu",
match = NULL,
pop = "pop",
sampling_type = c("simple", "systematic")) {
## Re-create psu parameter
if (length(psu) == 1) {
psu <- c(psu, psu)
}
## Process full list of sampling units
if (!is.null(match)) {
sample_x <- x[ , c(psu[1], match, pop)]
names(sample_x) <- c("id", "psu", "pop")
} else {
sample_x <- data.frame(
id = x[[psu[1]]],
psu = x[[psu[1]]],
pop = x[[pop]]
)
}
## Process survey data
sample_y <- merge(svy, sample_x, by.x = psu[2], by.y = "psu", all.x = TRUE)
names(sample_y)[1] <- "psu"
sample_y <- stats::aggregate(pop ~ psu, data = sample_y, FUN = unique)
sample_y <- data.frame(
id = sample_x$id[sample_x$psu %in% sample_y$psu], sample_y
)
## Get sampling type
samp <- match.arg(sampling_type)
## Determine number of clusters to select
n_clusters <- nrow(sample_y)
if (samp == "systematic") {
## Process cluster list dataset to produce a systematic sample of n_clusters
## clusters.
sample_x <- get_sample_systematic(x = sample_x, n_sample = n_clusters)
} else {
## Process cluster list dataset to produce a systematic sample of n_clusters
## clusters.
selected_row_numbers <- sample(seq_len(nrow(sample_x)), size = n_clusters)
sample_x <- sample_x[selected_row_numbers, ]
}
## Match using PSM -----------------------------------------------------------
# Assign TRUE or FALSE to group variable for each of the processed dataset to
# signify SMART dataset (TRUE) and systematic sample of clusters dataset
# (FALSE)
sample_x$group <- FALSE
sample_y$group <- TRUE
# Combine rows of sample list and rows of actual data list
xy <- rbind(sample_y, sample_x)
row.names(xy) <- seq_len(nrow(xy))
# Perform propensity score matching based on population using nearest
# neighbour algorithm
matched_sample <- MatchIt::matchit(group ~ pop, data = xy,
method = "nearest", ratio = 1)
# Get matched data
z <- MatchIt::match.data(matched_sample)
# Subset to the SMART dataset and the full list sample
z1 <- z[z$group, ]
z2 <- z[!z$group, ]
# Merge two datasets to get rows of matched psus (merging by subclass)
z_wide <- merge(z1, z2, by = "subclass")
# Extract matched psus with available data
selected_psus <- with(z_wide,
ifelse(is.na(psu.y), psu.x, psu.y)
)
# Get simulated survey dataset using selected psus
simulated_svy <- svy[svy[[psu[2]]] %in% selected_psus, ]
## Return simulated_svy
simulated_svy
}
################################################################################
#
#'
#' Select sampling units from a set of potential sampling units using propensity
#' score matching (PSM)
#'
#' @param x A data.frame of all potential primary sampling units (PSUs) to
#' sample from each uniquely identified and with corresponding populations.
#' @param psu A single character value or vector of values corresponding to
#' the variable name/s for the primary sampling unit in *x* and *svy*.
#' Default is *"psu"*.
#' @param pop A single character value corresponding to the variable name for
#' the population figures in *x*. Default is *"pop"*.
#' @param sampling_type Which sampling type to use. Choice between *simple
#' random sample (simple)* or *systematic sample (systematic)*. Default is
#' *simple*.
#'
#' @return A data.frame drawn from *x* that contains the primary sampling
#' units selected by propensity score matching.
#'
#' @author Mark Myatt and Ernest Guevarra
#'
#' @examples
#' get_sampling_clusters(
#' x = village_list,
#' psu = "cluster",
#' pop = "population"
#' )
#'
#' @export
#'
#
################################################################################
get_sampling_clusters <- function(x, psu = NULL, pop = NULL,
sampling_type = c("simple", "systematic")) {
##
x$group <- ifelse(is.na(x[[psu]]), FALSE, TRUE)
# Perform propensity score matching based on population using nearest
# neighbour algorithm
matched_sample <- MatchIt::matchit(
group ~ get(pop), data = x, method = "nearest", ratio = 1
)
# Get matched data
y <- MatchIt::match.data(matched_sample)
## Determine sampling_type
samp <- match.arg(sampling_type)
## Determine number of clusters needed
n_clusters <- nrow(x[!is.na(x$cluster), ])
## Select a sample
if (samp == "systematic") {
## Process cluster list dataset to produce a systematic sample of n_clusters
## clusters.
z <- get_sample_systematic(x = y, n_sample = n_clusters)
} else {
## Process cluster list dataset to produce a systematic sample of n_clusters
## clusters.
selected_row_numbers <- sample(seq_len(nrow(x)), size = n_clusters)
z <- y[selected_row_numbers, ]
}
## Return
z
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.