# R/categorical.R In BayesSampling: Bayes Linear Estimators for Finite Population

#### Documented in BLE_Categorical

#' Bayes Linear Method for Categorical Data
#'
#' Creates the Bayes Linear Estimator for Categorical Data
#'
#' @param ys k-vector of sample proportion for each category.
#' @param n sample size.
#' @param N total size of the population.
#' @param m k-vector with the prior proportion of each strata. If \code{NULL}, sample proportion for each strata will be used (non-informative prior).
#' @param rho matrix with the prior correlation coefficients between two different units within categories. It must be a symmetric square matrix of dimension k (or k-1). If \code{NULL}, non-informative prior will be used.
#'
#' @return A list containing the following components: \itemize{
#' \item \code{est.prop} - BLE for the sample proportion of each category
#' \item \code{Vest.prop} - Variance associated with the above
#' \item \code{Vs.Matrix} - Vs matrix, as defined by the BLE method (should be a positive-definite matrix)
#' \item \code{R.Matrix} - R matrix, as defined by the BLE method (should be a positive-definite matrix)
#' }
#'
#' @source \url{https://www150.statcan.gc.ca/n1/en/catalogue/12-001-X201400111886}
#' @references GonÃ§alves, K.C.M, Moura, F.A.S and  Migon, H.S.(2014). Bayes Linear Estimation for Finite Population with emphasis on categorical data. Survey Methodology, 40, 15-28.
#'
#' @examples
#' # 2 categories
#' ys <- c(0.2614, 0.7386)
#' n <- 153
#' N <- 15288
#' m <- c(0.7, 0.3)
#' rho <- matrix(0.1, 1)
#'
#' Estimator <- BLE_Categorical(ys,n,N,m,rho)
#' Estimator
#'
#' ys <- c(0.2614, 0.7386)
#' n <- 153
#' N <- 15288
#' m <- c(0.7, 0.3)
#' rho <- matrix(0.5, 1)
#'
#' Estimator <- BLE_Categorical(ys,n,N,m,rho)
#' Estimator
#'
#'
#' # 3 categories
#' ys <- c(0.2, 0.5, 0.3)
#' n <- 100
#' N <- 10000
#' m <- c(0.4, 0.1, 0.5)
#' mat <- c(0.4, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1, 0.6)
#' rho <- matrix(mat, 3, 3)
#'
#' @export
BLE_Categorical <- function(ys, n, N, m=NULL, rho=NULL){

mes_1 <- "parameter 'm' (prior proportions) not informed, sample proportions used in estimations"
mes_2 <- "parameter 'rho' not informed, non informative prior correlation coefficients used in estimations"

k <- length(ys)
if( k == 1 ){stop("only 1 category defined")}

if( prod(ys >= 0) != 1 ){stop("all sample proportions must be non-negative numbers")}
if( sum(ys) != 1 ){stop("sum of sample proportions should be 1")}

if( is.null(m) ){
message(mes_1)
m <- ys
}

if( length(m) != length(ys) ){stop("length of parameters 'ys' and 'm' must coincide")}
if( prod(m > 0) != 1 ){stop("all prior proportions must be positive numbers")}
if( sum(m) != 1 ){stop("sum of prior proportions should be 1")}

rho_informed <- 1
if( is.null(rho) ){
rho_informed <- 0
message(mes_2)
rho <- diag(x = 1-1e-10, nrow=k)
}

if( ! is.symmetric.matrix(rho) | dim(rho)[1] < k-1 ){stop("'rho' must be a symmetric square matrix of dimension k")}
if( max(abs(rho)) >= 1 ){stop("all values in 'rho' must be between -1 and 1")}

ys <- ys[-k]
m <- m[-k]

# Matrix 'm_ij'

m_ij <- matrix(0, nrow = k-1, ncol = k-1)

for (i in 1:(k-1)) {
for (j in 1:(k-1)) {
m_ij[i,j] <- (m[i]*m[j] + rho[j,i]*sqrt( m[i]*(1-m[i])*m[j]*(1-m[j]) ))/m[j]
}
}

a <- m
v <- m*(1-m)
c <- m * (diag(m_ij) - m)
sigma <- sqrt(v - c)

xs <- diag(k-1)
x_nots <- diag(k-1)

# Matrix 'R'

R_d <- diag(c, nrow = k-1)                    # elements in the diagonal

R_out <- matrix(0, nrow = k-1, ncol = k-1)
for (i in 1:(k-1)) {
for (j in 1:(k-1)) {
R_out[i,j] <- m[i] * (m_ij[j,i] - m[j])   # elements outside the diagonal
}
}
R_out <- R_out * (1 - diag(nrow = k-1))

R <- R_d + R_out

if( rho_informed == 1 ){
if( ! is.symmetric.matrix(R) ){stop("'R' must be a symmetric matrix. Review parameter 'rho'")}
if( ! is.positive.definite(R, tol=1e-15) ){warning("'R' should be a positive-definite matrix. Possible problem with parameter 'rho'")}
}

# Matrix 'Vs'

Vs_d <- diag(sigma^2, nrow = k-1)             # elements in the diagonal

Vs_out <- matrix(0, nrow = k-1, ncol = k-1)
for (i in 1:(k-1)) {
for (j in 1:(k-1)) {
Vs_out[i,j] <- (-m[i])*m_ij[j,i]          # elements outside the diagonal
}
}
Vs_out <- Vs_out * (1 - diag(nrow = k-1))

Vs <- (1/n)*(Vs_d + Vs_out)

if( rho_informed == 1 ){
if( ! is.symmetric.matrix(Vs) ){stop("'Vs' must be a symmetric matrix. Review parameter 'rho'")}
if( ! is.positive.definite(Vs, tol=1e-15) ){warning("'Vs' should be a positive-definite matrix. Possible problem with parameter 'rho'")}
}

V_nots <- Vs*n/(N-n)

C_inv <- ginv(R) + ginv(Vs)
C <- ginv(C_inv)
Beta <- C%*%(ginv(Vs)%*%ys + ginv(R)%*%a)

p_aux <- (n*ys + (N-n)*Beta)/N
p_k <- 1 - sum(p_aux)
p <- c(p_aux,p_k)

V_aux <- (V_nots + C) * ((N-n)/N)^2
V_k <- sum(V_aux)
Cov_k <- c
for (i in 1:k-1) {
Cov_k[i] <- -sum(V_aux[i,])
}
V_p <- rbind(cbind(V_aux, Cov_k[]), c(Cov_k, V_k))

if( prod(diag(V_p) > 0) != 1 ){warning("'Vest.prop' should have only positive diagonal values. Review prior specification and verify calculated matrices 'R' and 'Vs'.")}

return(list(est.prop = p, Vest.prop = V_p, Vs.Matrix = Vs, R.Matrix = R))

}


## Try the BayesSampling package in your browser

Any scripts or data that you put into this service are public.

BayesSampling documentation built on May 2, 2021, 1:06 a.m.