R/rss.design.R
In generalRSS: Statistical Tools for Balanced and Unbalanced Ranked Set Sampling

Documented in rss.design

#' @details This function offers a systematic approach for determining efficient sample allocations in unbalanced RSS. Given either an initial RSS data or design (but not both), the function optimizes sample allocation to ensure higher efficiency compared to the initial RSS design. For quantitative data, the function calculates the integer Neyman allocation proposed by Wright (2012), as well as, the adjusted Neyman allocation and local ratio consistent (LRC) allocation introduced by Ahn et al. (2022), which guarantees efficiency improvements compared to SRS and BRSS. For binary data, it computes the optimal Neyman allocation by Chen et al. (2006).
#' @title Calculate efficient sample allocations for ranked set sampling
#' @name rss.design
#' @description The rss.design function calculates three efficient sample allocations for each stratum using given either initial RSS data or design. These allocations include Integer Neyman, adjusted Neyman, and local ratio consistent allocations for quantitative data and optimal Neyman for binary data to improve sampling efficiency.
#'
#' @param data A numeric data frame of ranked set samples with columns 'rank' for ranks and 'y' for data values.
#' @param H The set size
#' @param org.n The initial sample allocation vector
#' @param var.h A vector of strata variances
#' @param prop logical indicating the sampling design for proportions
#'
#' @return
#' \item{original.n}{The initial RSS sample allocation.}
#' \item{Integer.Neyman}{The integer Neyman allocation.}
#' \item{Adj.Neyman}{The adjusted Neyman allocation.}
#' \item{LRC.allocation}{The local ratio consistent allocation.}
#' \item{Neyman.proportion}{The optimal Neyman allocation for proportions.}
#'
#' @references
#'
#' S. Ahn, X. Wang, and J. Lim. (2022) Efficient sample allocation by local adjustment for unbalanced ranked set sampling. In Recent Advances on Sampling Methods and Educational Statistics, Springer.
#'
#' H. Chen, E.A. Stasny, and D.A. Wolfe. (2006). Unbalanced ranked set sampling for estimating a population proportion. Biometrics, 62(1), 150-158.
#'
#' T. Wright (2012) The equivalence of Neyman optimum allocation for sampling and equal proportions for apportioning the U.S. house of representatives. The American Statistician, 66(4), 217-224.
#'
#' @seealso
#' \code{\link{rss.simulation}}: used for simulating Ranked Set Samples (RSS), which can serve as input.
#'
#' \code{\link{rss.sampling}}: used for sampling Ranked Set Samples (RSS) from a population data set, providing input data.
#'
#' @examples
#' ## Unbalanced RSS with a set size 3 and different sample sizes of 3, 10, and 5 for each stratum,
#' ## using perfect ranking from a t distribution with a mean of 0.
#' rss.data=rss.simulation(H=3,nsamp=c(3,10,5), dist="t", rho=1,delta=0)
#'
#' # Check the structure of the RSS data
#' colnames(rss.data) # Should include "y" and "rank"
#' head(rss.data$y)
#' head(rss.data$rank)
#'
#' ## RSS allocation calculation for a given pilot RSS data
#' rss.design(rss.data)
#'
#'
#' ## Unbalanced RSS with a set size 3 and different sample sizes of 10, 15, and 15 for each stratum,
#' ## using perfect ranking for proportions with a population proportion of 0.5.
#' rss.prop.data=rss.prop.simulation(H=3,nsamp=c(10,15,20),p=0.5)
#'
#' ## RSS allocation calculation for a given pilot RSS binary data
#' rss.design(rss.prop.data,prop=TRUE)
#'
#' @export
rss.design <- function(data=NULL,H=NULL,org.n=NULL,var.h=NULL,prop=FALSE){

  if (isTRUE(prop)) { # sample allocation for proportion cases
    if (is.null(data)) {
      stop("Please provide data.")
    } else if (!is.null(H) || !is.null(org.n) || !is.null(var.h)) {
      stop("H, org.n, and var.h are not needed for proportion tests.")
    } else{
      H = length(unique(data$rank))
      nsamp = table(data$rank)
      phat = mean(tapply(data$y,data$rank,mean))

      phat.h=rep(NA,H)
      for (h in 1:H){
        phat.h[h]=1-stats::pbinom(H-h+1,H,phat)+stats::dbinom(H-h+1,H,phat)
      }

      temp = sqrt(phat.h*(1-phat.h))
      Neyman.proportion = sum(nsamp)*temp/sum(temp)

      names(nsamp)=names(Neyman.proportion)=paste0("n", 1:H)

      return(list(original.n = nsamp, Neyman.proportion=Neyman.proportion))
    }
  } else{ # sample allocation for non-proportion cases
    if (!is.null(data) && (!is.null(H) || !is.null(org.n) || !is.null(var.h))) {
      stop("Please provide either data or (H, org.n, and var.h), not both.")
    }

    if(!is.null(data)){

      if(!all(c("rank", "y") %in% colnames(data))) {
        stop("The input data must contain 'rank' and 'y' variables.")
      }

      data_H = length(unique(data$rank))
      org.n = table(data$rank)
      var.h = tapply(data$y, data$rank, stats::var)

    }else{
      if(is.null(H) || is.null(org.n) || is.null(var.h)) {
        stop("Error: Please provide either data or all of H, org.n, and var.h.")
      }
      if(length(org.n) != H || length(var.h) != H) {
        stop("Error: Length of org.n and var.h must match the set size H.")
      }
      data_H = H
    }
    H = data_H
    ord=order(var.h)
    ord.org=org.n[ord]
    ord.var.h=var.h[ord]
    sd.bh=c(sqrt(ord.var.h))
    den=sqrt((1:sum(ord.org))*(2:(sum(ord.org)+1)))
    mat=t(matrix(rep(1/den,H),sum(ord.org),H))

    elm=NULL
    elm[order(c(sd.bh*mat),decreasing=T)]=1:(sum(ord.org)*H)
    mat_ord=matrix(elm,H,sum(ord.org))
    neyman=apply(mat_ord<=sum(ord.org)-H,1,sum)+1
    neyman[ord]=neyman

    adj.neyman=apply(cbind(neyman,org.n),1,max)

    uh = ord.org[2:H]/ord.org[1:(H-1)] * ord.var.h[1:(H-1)]/ord.var.h[2:H]
    lh = ord.org[2:H]/ord.org[1:(H-1)]
    LRC.n = ord.org
    while( (max(uh) > 1) || (min(lh) < 1) ){
      if(sum(uh>1)>0){
        LRC.n[which.max(uh)]=LRC.n[which.max(uh)]+1
      }
      if(sum(lh<1)>0){
        LRC.n[which.min(lh)+1]=LRC.n[which.min(lh)+1]+1
      }
      uh = LRC.n[2:H]/LRC.n[1:(H-1)] * ord.var.h[1:(H-1)]/ord.var.h[2:H]
      lh = LRC.n[2:H]/LRC.n[1:(H-1)]
    }
    LRC.n[ord]=LRC.n

    names(org.n)=names(neyman)=names(adj.neyman)=names(LRC.n)=paste0("n", 1:H)

    return(list(original.n = org.n, Integer.Neyman = neyman, Adj.Neyman = adj.neyman, LRC.allocation = LRC.n))
  }
}