# R/E.UC.R In TeachingSampling: Selection of Samples and Parameter Estimation in Finite Population

#### Documented in E.UC

#' @importFrom magrittr %>%
#' @importFrom dplyr group_by
#' @importFrom dplyr summarise
#' @importFrom dplyr left_join
#' @export
#'
#' @title
#' Estimation of the Population Total and its variance using the Ultimate Cluster technique
#' @description
#' This function computes a weighted estimator of the population total and
#' estimates its variance by using the Ultimate Cluster technique. This approximation
#' performs well in many sampling designs. The user specifically needs to
#' declare the variables of interest, the primary sampling units, the strata, and
#' the sampling weights for every singlt unit in the sample.
#' @return
#' This function returns the estimation of the population total of
#' every single variable of interest, its estimated standard error
#' and its estimated coefficient of variation.
#' @details
#' The function returns a data matrix whose columns correspond to
#' the estimated parameters of the variables of interest.
#' @author Hsugo Andres Gutierrez Rojas <hugogutierrez at gmail.com>
#' @param S Vector identifying the membership to the strata of each unit in selected sample.
#' @param PSU Vector identifying the membership to the strata of
#' each unit in the population.
#' @param dk Sampling weights of the units in the sample.
#' @param y Vector, matrix or data frame containig the recollected
#' information of the variables of interest for every unit in the
#' selected sample.
#'
#' @references
#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr
#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. Editorial Universidad Santo Tomas
#'
#'
#' @examples
#'
#' #############################
#' ## Example 1:              ##
#' ## Stratified Two-stage SI ##
#' #############################
#'
#' data('BigCity')
#' FrameI <- BigCity %>% group_by(PSU) %>%
#' summarise(Stratum = unique(Stratum),
#'           Persons = n(),
#'           Income = sum(Income),
#'           Expenditure = sum(Expenditure))
#'
#' attach(FrameI)
#'
#' sizes = FrameI %>% group_by(Stratum) %>%
#'         summarise(NIh = n(),
#'         nIh = 2,
#'         dI = NIh/nIh)
#'
#' NIh <- sizes$NIh #' nIh <- sizes$nIh
#'
#' samI <- S.STSI(Stratum, NIh, nIh)
#' UI <- levels(as.factor(FrameI$PSU)) #' sampleI <- UI[samI] #' #' FrameII <- left_join(sizes, BigCity[which(BigCity$PSU %in% sampleI), ])
#' attach(FrameII)
#'
#' HHdb <- FrameII %>%
#'         group_by(PSU) %>%
#'         summarise(Ni = length(unique(HHID)))
#'
#' Ni <- as.numeric(HHdb$Ni) #' ni <- ceiling(Ni * 0.1) #' ni #' sum(ni) #' #' sam = S.SI(Ni[1], ni[1]) #' clusterII = FrameII[which(FrameII$PSU == sampleI[1]), ]
#' sam.HH <- data.frame(HHID = unique(clusterII$HHID)[sam]) #' clusterHH <- left_join(sam.HH, clusterII, by = "HHID") #' clusterHH$dki <- Ni[1]/ni[1]
#' clusterHH$dk <- clusterHH$dI * clusterHH$dki #' data = clusterHH #' for (i in 2:length(Ni)) { #' sam = S.SI(Ni[i], ni[i]) #' clusterII = FrameII[which(FrameII$PSU == sampleI[i]), ]
#'       sam.HH <- data.frame(HHID = unique(clusterII$HHID)[sam]) #' clusterHH <- left_join(sam.HH, clusterII, by = "HHID") #' clusterHH$dki <- Ni[i]/ni[i]
#'       clusterHH$dk <- clusterHH$dI * clusterHH$dki #' data1 = clusterHH #' data = rbind(data, data1) #' } #' #' sum(data$dk)
#' attach(data)
#' estima <- data.frame(Income, Expenditure)
#' area <- as.factor(PSU)
#' stratum <- as.factor(Stratum)
#'
#' E.UC(stratum, area, dk, estima)
#'
#' ################################
#' ## Example 2:                 ##
#' ## Self weighted Two-stage SI ##
#' ################################
#'
#' data('BigCity')
#' FrameI <- BigCity %>% group_by(PSU) %>%
#' summarise(Stratum = unique(Stratum),
#'           Households = length(unique(HHID)),
#'           Income = sum(Income),
#'           Expenditure = sum(Expenditure))
#'
#' attach(FrameI)
#'
#' sizes = FrameI %>% group_by(Stratum) %>%
#'         summarise(NIh = n(),
#'         nIh = 2)
#'
#' NIh <- sizes$NIh #' nIh <- sizes$nIh
#'
#' resI <- S.STpiPS(Stratum, Households, nIh)
#' samI <- resI[, 1]
#' piI <- resI[, 2]
#' UI <- levels(as.factor(FrameI$PSU)) #' sampleI <- data.frame(PSU = UI[samI], dI = 1/piI) #' #' FrameII <- left_join(sampleI, #' BigCity[which(BigCity$PSU %in% sampleI[,1]), ])
#'
#' attach(FrameII)
#'
#' HHdb <- FrameII %>%
#'         group_by(PSU) %>%
#'         summarise(Ni = length(unique(HHID)))
#' Ni <- as.numeric(HHdb$Ni) #' ni <- 5 #' #' sam = S.SI(Ni[1], ni) #' clusterII = FrameII[which(FrameII$PSU == sampleI$PSU[1]), ] #' sam.HH <- data.frame(HHID = unique(clusterII$HHID)[sam])
#' clusterHH <- left_join(sam.HH, clusterII, by = "HHID")
#' clusterHH$dki <- Ni[1]/ni #' clusterHH$dk <- clusterHH$dI * clusterHH$dki
#' data = clusterHH
#' for (i in 2:length(Ni)) {
#'       sam = S.SI(Ni[i], ni)
#'       clusterII = FrameII[which(FrameII$PSU == sampleI$PSU[i]), ]
#'       sam.HH <- data.frame(HHID = unique(clusterII$HHID)[sam]) #' clusterHH <- left_join(sam.HH, clusterII, by = "HHID") #' clusterHH$dki <- Ni[i]/ni
#'       clusterHH$dk <- clusterHH$dI * clusterHH$dki #' data1 = clusterHH #' data = rbind(data, data1) #' } #' #' sum(data$dk)
#' attach(data)
#' estima <- data.frame(Income, Expenditure)
#' area <- as.factor(PSU)
#' stratum <- as.factor(Stratum)
#'
#' E.UC(stratum, area, dk, estima)

E.UC <- function(S, PSU, dk, y) {
y <- cbind(1, y)
y <- as.data.frame(y)
names(y)[1] <- "N"
PSU <- as.factor(PSU)
S <- as.factor(S)

Total <- matrix(NA, nrow = 4, ncol = dim(y)[2])
rownames(Total) = c("Estimation", "Standard Error", "CVE",
"DEFF")
colnames(Total) <- names(y)

for (k in 1:dim(y)[2]) {
yk <- tyi <- NULL
matriz <- data.frame(y[, k], dk, PSU, S)
colnames(matriz)[1] <- "yk"

P1 <- matriz %>%
group_by(PSU) %>%
summarise(tyi = sum(yk * dk),
S = unique(S))

P2 <- P1 %>%
group_by(S) %>%
summarise(barty = mean(tyi))

P3 <- matriz %>%
group_by(S) %>%
summarise(mi = length(unique(PSU)))

T1 <- left_join(left_join(P1, P2, by="S"), P3, by="S")
T1$s2 <- (T1$mi/(T1$mi-1))*(T1$tyi - T1$barty)^2 Vty <- sum(T1$s2, na.rm = T)
ty <- sum(matriz\$yk * dk)

CVe <- 100 * sqrt(Vty)/ty
n <- length(y[, k])
N <- sum(dk)
VMAS <- (N^2) * (1 - (n/N)) * var(y[, k])/(n)
DEFF <- Vty/VMAS
Total[, k] <- c(ty, sqrt(Vty), CVe, DEFF)
}
return(Total)
}


## Try the TeachingSampling package in your browser

Any scripts or data that you put into this service are public.

TeachingSampling documentation built on April 22, 2020, 1:05 a.m.