Nothing
#' Optimal sample size allocation
#'
#' @description The function computes optimal sample size allocation over strata.
#'
#' @section Details:
#' If \code{s2h} and \code{Rh} is not defined, the sample allocation will be calculated as proportional allocation (proportional to the population size).
#' If \code{Rh} is not defined, the sample allocation will be calculated as Neyman allocation.
#'
#' @param H The stratum variable. One dimensional object convertible to one-column \code{data.table}, variable name as character, or column number.
#' @param n Total sample size. One dimensional object with length one.
#' @param poph Population size in each stratum. One dimensional object convertible to one-column \code{data.table}, variable name as character, or column number.
#' @param s2h The expected population variance \eqn{S^2} for variables of interest in each stratum (optional). If not defined, it is assumed to be 1 in each stratum. Object convertible to \code{data.table}, variable name as character vector, or column numbers.
#' @param Rh The expected response rate in each stratum (optional). If not defined, it is assumed to be 1 in each stratum (full-response). Object convertible to one-column \code{data.table}, variable name as character, or column number.
#' @param deffh The expected design effect for the estimate of variable (optional). If not defined, it is assumed to be 1 for each variable in each stratum. If is defined, then variables is defined the same arrangement as \code{Yh}. Object convertible to \code{data.table}, variable name as character vector, or column numbers.
#' @param fullsampleh Variable for detection fully surveyed stratum (optinal). If not defined, it is assumed to be 1 in each stratum (full-response). Object convertible to one-column \code{data.table}, variable name as character, or column number.
#' @param dataset Optional survey data object convertible to \code{data.table} with one row for each stratum.
#'
#' @return An object as \code{data.table}, with variables:\cr
#' \code{H} - stratum, \cr
#' \code{variable} - the name of variable for population variance \eqn{S^2}, \cr
#' \code{s2h} - population variance \eqn{S^2}, \cr
#' \code{Rh} - the expectedresponse rate, \cr
#' \code{deffh} - the expected design effect, \cr
#' \code{poph} - population size, \cr
#' \code{deffh} - design effect, \cr
#' \code{fullsampleh} - full sample indicator, \cr
#' \code{nh} - sample size.
#'
#' @seealso \code{\link{expsize}}, \code{\link{dom_optimal_allocation}}
#'
#' @keywords surveysampling
#'
#' @examples
#' library("data.table")
#' data <- data.table(H = 1 : 3,
#' s2h=10 * runif(3),
#' s2h2 = 10 * runif(3),
#' poph = 8 * 1 : 3,
#' Rh = rep(1, 3),
#' dd = c(1, 1, 1))
#'
#' vars <- optsize(H = "H",
#' s2h = c("s2h", "s2h2"),
#' n = 10, poph = "poph",
#' Rh = "Rh",
#' fullsampleh = NULL,
#' dataset = data)
#' vars
#'
#' @import data.table stats
#' @export optsize
#'
optsize <- function(H, n, poph,
s2h = NULL,
Rh = NULL,
deffh = NULL,
fullsampleh = NULL,
dataset = NULL) {
### Checking
if( length(n) != 1 | !any(n>0 | abs(n - round(n)) < .Machine$double.eps)) stop("'n' must be a integer value greater than 0")
if(!is.null(dataset)) {
dataset <- data.table(dataset)
if(!is.null(H)) {
if (min(H %in% names(dataset)) != 1) stop("'H' does not exist in 'dataset'!")
if (min(H %in% names(dataset)) == 1) H <- dataset[, H, with = FALSE] }
if(!is.null(s2h)) {
if (min(s2h %in% names(dataset)) != 1) stop("'s2h' does not exist in 'dataset'!")
if (min(s2h %in% names(dataset)) == 1) s2h <- dataset[, s2h, with = FALSE] }
if(!is.null(poph)) {
if (min(poph %in% names(dataset)) != 1) stop("'poph' does not exist in 'dataset'!")
if (min(poph %in% names(dataset)) == 1) poph <- dataset[, poph, with = FALSE] }
if(!is.null(Rh)) {
if (min(Rh %in% names(dataset)) != 1) stop("'Rh' does not exist in 'dataset'!")
if (min(Rh %in% names(dataset)) == 1) Rh <- dataset[, Rh, with = FALSE] }
if(!is.null(deffh)) {
if (min(deffh %in% names(dataset)) != 1) stop("'deffh' does not exist in 'dataset'!")
if (min(deffh %in% names(dataset)) == 1) deffh <- dataset[, deffh, with = FALSE] }
if(!is.null(fullsampleh)) {
if (min(fullsampleh %in% names(dataset)) != 1) stop("'fullsampleh' does not exist in 'dataset'!")
if (min(fullsampleh %in% names(dataset)) == 1) fullsampleh <- dataset[, fullsampleh, with = FALSE] }
}
# s2h
if (is.null(s2h)) s2h <- rep(1, nrow(H))
s2h <- data.table(s2h, check.names = TRUE)
m <- ncol(s2h)
if (any(is.na(s2h))) stop("'s2h' has unknown values")
if (!all(sapply(s2h, is.numeric))) stop("'s2h' must be numeric values")
if (is.null(names(s2h))) stop("'s2h' must be colnames")
# H
H <- data.table(H)
if (nrow(H) != nrow(s2h)) stop("'H' length must be equal with 'S2h' row count")
if (ncol(H) != 1) stop("'H' must be 1 column data.frame, matrix, data.table")
if (any(is.na(H))) stop("'H' has unknown values")
if (is.null(names(H))) stop("'H' must be colnames")
# poph
poph <- data.frame(poph)
if (nrow(poph) != nrow(s2h)) stop("'poph' must be equal with 's2h' row count")
if (ncol(poph) != 1) stop("'poph' must be vector or 1 column data.frame, matrix, data.table")
poph <- poph[, 1]
if (!is.numeric(poph)) stop("'poph' must be numerical")
if (any(is.na(poph))) stop("'poph' has unknown values")
# Rh
if (is.null(Rh)) Rh <- rep(1, nrow(s2h))
Rh <- data.frame(Rh)
if (nrow(Rh) != nrow(s2h)) stop("'Rh' must be equal with 's2h' row count")
if (ncol(Rh) != 1) stop("'Rh' must be vector or 1 column data.frame, matrix, data.table")
Rh <- Rh[, 1]
if (!is.numeric(Rh)) stop("'Rh' must be numerical")
if (any(is.na(Rh))) stop("'Rh' has unknown values")
# fullsampleh
if (is.null(fullsampleh)) fullsampleh <- rep(0, length(Rh))
fullsampleh <- data.frame(fullsampleh)
if (nrow(fullsampleh) != nrow(s2h)) stop("'fullsampleh' must be equal with 's2h' row count")
if (ncol(fullsampleh) != 1) stop("'fullsampleh' must be vector or 1 column data.frame, matrix, data.table")
fullsampleh <- fullsampleh[, 1]
if (!is.numeric(fullsampleh)) stop("'fullsampleh' must be numerical")
if (any(is.na(fullsampleh))) stop("'fullsampleh' has unknown values")
if (any(!(fullsampleh %in% c(0, 1)))) stop("'fullsampleh' must be two values: 0 or 1")
# deffh
if (!is.null(deffh)) {
deffh <- data.table(deffh, check.names=TRUE)
if (nrow(deffh) != nrow(s2h)) stop("'deffh' length must be equal with 's2h' row count")
if (ncol(deffh) != m) stop("'deffh' and 'Yh' must be equal column count")
if (any(is.na(deffh))) stop("'deffh' has unknown values")
if (!all(sapply(deffh, is.numeric))) stop("'deffh' must be numeric values")
if (is.null(names(deffh))) stop("'deffh' must be colnames")
}
variable <- pnh <- nh <- NULL
resulth <- data.table(H, Rh = Rh, poph = poph, fullsampleh = fullsampleh)
nh_cor <- n - resulth[get("fullsampleh") == 1, sum(poph)]
if (nh_cor < 0) stop("'n' need larger!")
s2h <- data.table(H, s2h)
ns2h <- names(s2h)
s2h <- melt(s2h, id = c(names(H)))
setnames(s2h, "value", "s2h")
resulth <- merge(s2h, resulth, all = TRUE, by = names(H))
if (!is.null(deffh)) {
deffh <- data.table(H, deffh)
setnames(deffh, names(deffh), ns2h)
deffh <- melt(deffh, id = names(H))
setnames(deffh, "value", "deffh")
resulth <- merge(resulth, deffh, all = TRUE, by = c(names(H), "variable"))
} else resulth[, deffh := 1]
resulth[, pnh := ifelse(fullsampleh == 0, poph * sqrt(s2h * deffh / Rh), 0)]
resulth[fullsampleh == 0, nh := nh_cor * pnh / sum(pnh), by = "variable"]
resulth[fullsampleh == 1, nh := poph]
resulth[, pnh := NULL]
return(resulth)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.