#' Calculate the Horvitz-Thompson mean of an adaptive cluster sample.
#'
#' @template pi_i_values
#' @template N
#' @template n1
#' @template m_vec
#' @template y
#' @param sampling A vector (\code{character} format) describing whether units were included in the initial sample or subsequent ACS sample. Units selected in the initial sample should be given the value "Initial_Sample" in the \code{sampling} vector.
#' @template criterion
#' @description This calculate the Horvitz-Thompson mean of an adaptive cluster sample done by sampling without replacement.
#'
#'where \eqn{v} is the number of distinct units in the sample and
#'\eqn{J_k} is an indicator variable, equalling 0 if the \eqn{k^{th}} unit in the sample does not satisfy the condition and was not selected in the initial sample; otherwise, \eqn{J_k = 1}.
#'
#' @return The Horvitz-Thompson mean.
#' @references
#' \insertRef{thompson1990adaptive}{ACSampling}
#' @examples
#' library(magrittr)
#' library(plyr)
#' library(dplyr)
#' library(ggplot2)
#'
#' # EXAMPLE 1: Sampling of population from Figure 1, Thompson (1990)
#'
#' data(Thompson1990Fig1Pop)
#' data(Thompson1990Figure1Sample)
#'
#' # plot sample overlaid onto population
#' ggplot() +
#' geom_point(data=Thompson1990Fig1Pop, aes(x,y, size=factor(y_value),
#' shape=factor(y_value))) +
#' scale_shape_manual(values=c(1, rep(16, length(2:13)))) +
#' geom_point(data=Thompson1990Figure1Sample, aes(x,y), shape=0, size=7)
#'
#' Z <- createACS(popdata=Thompson1990Fig1Pop,
#' n1=dim(Thompson1990Figure1Sample)[1], yvar="y_value",
#' initsample=Thompson1990Figure1Sample)
#' # CALCULATE y_HT
#' y_HT(
#' N = dim(Thompson1990Fig1Pop)[1],
#' n1 = dim(Thompson1990Figure1Sample)[1],
#' m_vec = Z$m,
#' y = Z$y_value,
#' sampling = Z$Sampling,
#' criterion=0
#' )
#'
#' # EXAMPLE 2: Table 1 from Thompson (1990)
#' #data(Thompson1990Table1data)
#' #(Thompson1990Table1 = Thompson1990Table1data %>%
#' #group_by(sampling_effort) %>%
#' #summarise(
#'# `y (added through SRSWOR)` = toString(y_value[which(sampling=="SRSWOR")]),
#' # `y (added through ACS)` = toString(y_value[which(sampling=="ACS")]),
#' # y_bar_1 = mean(y_value[which(sampling=="SRSWOR")]),
#' # y_HT = round(y_HT(N=dim(Thompson1990Fig1Pop)[1], n1=2, m_vec=m, y=y_value, sampling, 5), 2),
#' # y_bar = round(mean(y_value),2)
#' # )
#' #)
#'
#' # EXAMPLE 3:
#' # data(cactus_realizations)
#' # realization = cactus_realizations %>% filter(n.networks==40)
#' # Ch. 24, Exercise #2, p. 307, from Thompson (2002)
#' N = 1000
#' n1 = 100
#' m_vec = c(2,3, rep(1,98))
#' y = c(3,6, rep(0, 98))
#' sampling = "SRSWOR"
#' criterion = 0
#' round(
#' y_HT(N,n1,m_vec,y,sampling,criterion)*1000, 0
#' )
#' @export
y_HT <- function(y, N, n1, pi_i_values=NULL, m_vec=NULL, sampling=NULL, criterion=NULL) {
if (!(is.null(sampling)) & !(is.null(criterion))) {
J = ifelse(y >= criterion | sampling=="Initial_Sample", 1, 0)
} else {
J = 1
}
if (is.null(pi_i_values)) {
pi_i_values = pi_i(N, n1, m_vec)
}
y_HT = sum(y*J/pi_i_values, na.rm=T)/N
return(y_HT)
}
#' Calculate the Horvitz-Thompson mean of an adaptive cluster sample, NEW FORMULA.
#'
#' @template pi_i_values
#' @template N
#' @template n1
#' @template m_vec
#' @template y
#' @param sampling A vector (\code{character} format) describing whether units were included in the initial sample or subsequent ACS sample. Units selected in the initial sample should be given the value "Initial_Sample" in the \code{sampling} vector.
#' @template criterion
#' @template m_threshold
#' @description This calculate the Horvitz-Thompson mean of an adaptive cluster sample done by sampling without replacement.
#'
#'where $v$ is the number of distinct units in the sample and
#'$J_k$ is an indicator variable, equalling 0 if the $k$ th unit in the sample does not satisfy the condition and was not selected in the initial sample; otherwise, $J_k = 1$.
#'
#' @return The Horvitz-Thompson mean.
#' @references
#' \insertRef{saubyadaptive}{ACSampling}
#' \insertRef{thompson1990adaptive}{ACSampling}
#'
#' @export
#' @examples
#' # Ch. 24, Exercise #2, p. 307, from Thompson (2002)
#' N = 1000
#' n1 = 100
#' m_vec = c(2,3, rep(1,98))
#' y = c(3,6, rep(0, 98))
#' sampling = "SRSWOR"
#' criterion = 0
#' round(
#' y_HT(N,n1,m_vec,y,sampling,criterion)*1000, 0
#' )
new_y_HT <- function(y, N, n1, m_threshold, pi_i_values=NULL, m_vec=NULL, sampling=NULL, criterion=NULL) {
if (!(is.null(sampling)) & !(is.null(criterion))) {
J = ifelse(y >= criterion | sampling=="Initial_Sample", 1, 0)
} else {
J = 1
}
Z = data.frame(y=y, m_vec=m_vec)
# with replacement inclusion probability for cluster units
# pi_i = 1 - ( 1 - ((m + a)/N) )^ n
# how to make this work for a network
A <- Z %>% filter(m_vec <= m_threshold)
B <- Z %>% filter(m_vec > m_threshold)
if (dim(A)[1] > 0) {
A$pi_i_values = pi_i(N, n1, A$m_vec)
}
if (dim(B)[1] > 0) {
B$pi_i_values = pi_i(N, n1, m_threshold)
}
Z <- bind_rows(A, B) %>% as.data.frame
y_HT = sum(unlist(Z$y)*J/unlist(Z$pi_i_values), na.rm=T)/N
return(y_HT)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.