# R/computeOptimalSampleSizeRiskGroups.R In FFD: Freedom from Disease

#### Documented in computeOptimalSampleSizeRiskGroups

```## Ian Kopacka
## 2011-07-05
##
## Function: computeOptimalSampleSizeRiskGroups
##
## Computes the optimal sample size (for each risk group) for a survey to
## substantiate freedom from disease for a population stratified into risk groups.
## The optimal sample size is the smallest sample size that produces
## an alpha-error less than or equal to a prediscribed value for alpha.
## The population is considered as diseased if at least one individual has a
## positive test result. The sample size is computed using a bisection method.
##
## The sample size can be fixed for a subset of the risk groups via the input
## parameter 'nSampleFixVec' (vector containing sample sizes for the risk groups
## with fixed values and NA for the risk groups for which the sample size is to
## be computed). For those risk groups for which the sample size is to be
## computed a vector specifying the proportional distribution among the risk
## groups ('nSamplePropVec') needs to be specified.
##
## Example: We have 3 risk groups. For the 2nd risk group we want 20 farms
## to be sampled. For the other  risk groups we specify that the sample size
## for risk group 1 should be double the sample size of risk group 3. We
## then set:
##     nSampleFixVec <- c(NA, 20, NA)
##     nSamplePropVec <- c(2,1)
##
## Input parameters:
##     nPopulationVec...Integer vector. Population sizes of the risk groups.
##     nRelRiskVec......Numeric vector. (Relative) infection risks of the
##                      risk groups.
##     nSampleFixVec.......Numeric vector containing NAs (optional argument).
##                      For risk groups for which the sample size is fixed
##                      specify the sample size. For the risk groups for which
##                      the sample size should be computed set NA (order of the
##                      risk groups must be the same order as in 'nPopulationVec'
##                      and 'nRelRiskVec') .
##     nSamplePropVec...Numeric vector. For those risk groups for which the
##                      sample size should be computed a proportional
##                      distribution of the overall sample size must be specified.
##                      The vector must have the same length as the number of
##                      NA entries in nSampleFixVec or if nSampleFixVec is not
##                      specified, nSamplePropVec must have the same length as
##                      nPopulationVec.
##     prevalence.......Numeric between 0 and 1. Prevalence of the disease
##                      under the null hypothesis.
##     alpha............Numeric between 0 and 1. Type one error for the
##                      statistical test.
##     sensitivity......Numeric between 0 and 1. Sensitivity of test (diagnostic
##                      test for one stage sampling, herd test for two stage
##                      sampling).
##     specificity......Numeric between 0 and 1. Specificity of test (diagnostic
##                      test for one stage sampling, herd test for two stage
##                      sampling).
##     lookupTable......Logical. TRUE if a lookup table of sample sizes for
##                      individual sampling (see, Ziller et al., 2002) should
##                      be produced. FALSE if the sample size is desired
##                      for a fixed population size (default).
##
## Calls:
##    computePValueRiskGroups.R
##
## Is called by:
##    -
##
computeOptimalSampleSizeRiskGroups <- function(nPopulationVec, nRelRiskVec,
nSampleFixVec = NULL, nSamplePropVec = NULL, prevalence, alpha = 0.05,
sensitivity = 1, specificity = 1){
## Fixed sample sizes:
if (is.null(nSampleFixVec)) nSampleFixVec <- rep(NA, length(nPopulationVec))

## Error check:
if (length(nRelRiskVec) != length(nPopulationVec)){
stop ("'nPopulationVec' and 'nRelRiskVec' must have the same length.")
}
if (length(nSampleFixVec) != length(nPopulationVec)){
stop ("'nSampleFixVec' and 'nRelRiskVec' must have the same length.")
}
if ((sum(is.na(nSampleFixVec)) > 1) & (length(nSamplePropVec) != sum(is.na(nSampleFixVec)))){
stop ("'nSamplePropVec' does not have the correct length.")
}
if ((sum(is.na(nSampleFixVec)) == 1) & (!is.null(nSamplePropVec))){
warning ("Argument 'nSamplePropVec' is ignored.")
}
if (min(nSamplePropVec) <= 0){
stop ("'nSamplePropVec' must have positive values.")
}
if (sum(is.na(nSampleFixVec)) == 0){
stop ("'nSampleFixVec' must contain NA values.")
}

## Calculate the number of diseased individuals in the population:
nDiseased <- max(round(sum(nPopulationVec)*prevalence),1)
## Proportion for sample sizes:
if (sum(is.na(nSampleFixVec)) > 1){
nSamplePropVec <- nSamplePropVec/sum(nSamplePropVec)
} else {
nSamplePropVec <- 1
}

# Initialize the parameters for the bisection method:
if (sum(nSampleFixVec, na.rm = TRUE) > 0){
lowerBoundSampleSize <- 0
} else {
lowerBoundSampleSize <- 1
}
upperBoundSampleSize <- min(floor(nPopulationVec[is.na(nSampleFixVec)] /
nSamplePropVec))
nSampleArgVec <- nSampleFixVec
## Compute alpha errors:
probabilityLowerUpper <- sapply(c(lowerBoundSampleSize,
upperBoundSampleSize), function(sampleSize){
nSampleArgVec[is.na(nSampleFixVec)] <-
roundConstantSum(sampleSize*nSamplePropVec, output = 0)
out <- computePValueRiskGroups(nPopulationVec = nPopulationVec,
nSampleVec = nSampleArgVec, nRelRiskVec = nRelRiskVec,
nDiseased = nDiseased, sensitivity = sensitivity,
specificity = specificity)
return(out)
})
## Check if upper bound satisfies the desired accuracy:
if (probabilityLowerUpper[2] > alpha) return(Inf)
## Check if lower bound satisfies the desired accuracy:
if (probabilityLowerUpper[1] <= alpha){
nSampleOutVec <- nSampleFixVec
nSampleOutVec[is.na(nSampleFixVec)] <-
roundConstantSum(lowerBoundSampleSize*nSamplePropVec, output = 0)
return(nSampleOutVec)
}
## Bisection method: Due to the discrete nature of the variable
## (= optimal sample size) the bisection method is executed
## until the width of the search interval falls below a certain
## threshold. The probabilities for the remaining cases are
## explicitly computed and the optimal sample size is chosen:
minIntervalWidth <- 4 #10
while (upperBoundSampleSize - lowerBoundSampleSize > minIntervalWidth){
midSampleSize <- round((lowerBoundSampleSize + upperBoundSampleSize)/2)
nSampleArgVec[is.na(nSampleFixVec)] <-
roundConstantSum(midSampleSize*nSamplePropVec, output = 0)
probabilityMid <- computePValueRiskGroups(nPopulationVec = nPopulationVec,
nSampleVec = nSampleArgVec, nRelRiskVec = nRelRiskVec,
nDiseased = nDiseased, sensitivity = sensitivity,
specificity = specificity)
if (probabilityMid > alpha){
lowerBoundSampleSize <- midSampleSize
} else {
upperBoundSampleSize <- midSampleSize
}
}
## Explicitly compute the probabilities for the remaining cases
## and compare the cases. Return the smallest sample size that
## has a probability of returning no test positives smaller than
## or equal to alpha:
sampleSizeVector <- lowerBoundSampleSize : upperBoundSampleSize
probabilityVector <- sapply(sampleSizeVector,
function(sampleSize){
nSampleArgVec <- nSampleFixVec
nSampleArgVec[is.na(nSampleFixVec)] <-
roundConstantSum(sampleSize*nSamplePropVec, output = 0)
out <- computePValueRiskGroups(nPopulationVec = nPopulationVec,
nSampleVec = nSampleArgVec, nRelRiskVec = nRelRiskVec,
nDiseased = nDiseased, sensitivity = sensitivity,
specificity = specificity)
return(out)
})
indexProbablityAlpha <- which(probabilityVector <= alpha)
if (length(indexProbablityAlpha) > 0){
sampleSizeOpt <- sampleSizeVector[min(indexProbablityAlpha)]
} else {
sampleSizeOpt <- upperBoundSampleSize
}
## Return value = optimal sample size:
nSampleOutVec <- nSampleFixVec
nSampleOutVec[is.na(nSampleFixVec)] <-
roundConstantSum(sampleSizeOpt*nSamplePropVec, output = 0)
return(nSampleOutVec)
}
```

## Try the FFD package in your browser

Any scripts or data that you put into this service are public.

FFD documentation built on May 2, 2019, 5:25 p.m.