R/estimate_participant.R

Defines functions estimate_participant

Documented in estimate_participant

#' Estimate the number of people needed and expected number of unique properties for a determined coverage based on the estimated norms
#'
#' @param est_norms a data frame with the estimated norms (generated by generateNorms)
#' @param target_cover float between 0 and 1, corresponding to coverage (the fraction of the total incidence probabilities of the reported properties that are in the reference sample)
#' @return a vector with the extra number of participant to achieve the especific coverage, and the estimate of the number of unique properties listed by the new amount of suggested people
#' @export
#' @examples
#' estimated_norms=generate_norms(data_test)
#' estimate_participant(estimated_norms,0.8)


estimate_participant <- function(est_norms, target_cover) {

    # Creating the initial output est_t_S <- matrix(0, dim = c(dim(est_norms)[1], 4))
    est_t_S <- data.frame(matrix(0, dim(est_norms)[1], 4))
    est_t_S[, 1] <- est_norms$Concept

    # Estimating the new number of participant to achieve the coverage
    est_t_S[, 2] <- (log((est_norms$U/est_norms$Q1) * (1 - target_cover))/log((((est_norms$T - 1) * est_norms$Q1)/(((est_norms$T -
        1) * est_norms$Q1) + 2 * est_norms$Q2)))) - 1

    # Approximating to the highest integer
    est_t_S[, 2] <- ceiling(est_t_S[, 2])

    # Obtaining the indexes for the special cases
    indexT2T <- est_t_S[, 2] > 2*est_norms$T
    est_t_S[, 2][indexT2T] = 2*est_norms$T[indexT2T]
    indexTL0 <- (est_t_S[, 2] <= 0) & (est_t_S[, 2] > -Inf)
    indexTinf <- est_norms$Q2 == 0

    # Estimating Q0 (the properties that has not been listed)
    estQ0 <- est_norms$S_hat - est_norms$S_obs

    # Estimating the expected number of unique listed properties
    est_t_S[, 3] <- est_norms$S_obs + estQ0 * (1 - (1 - (est_norms$Q1/(est_norms$T * estQ0 + est_norms$Q1)))^est_t_S[,
        2])

    # Eliminating negative participant (the objective is already fulfilled)
    est_t_S[, 2][indexTL0] <- 0
    est_t_S[, 3][indexTL0] <- est_norms$S_hat[indexTL0]

    # If the expected number of new participant is more than twice the current number of participant a
    # warning is added
    est_t_S[, 4] <- ""
    est_t_S[, 4][indexT2T] <- "t_star > 2T, t_star = 2T"
    est_t_S[, 4][indexTinf] <- "Q2 = 0, cannot calculate t_star"

    # Labeling the dataframe
    colnames(est_t_S) <- c("Concept", "T_star", "S_hat_star", "Warning")

    return(est_t_S)
}

Try the CPNCoverageAnalysis package in your browser

Any scripts or data that you put into this service are public.

CPNCoverageAnalysis documentation built on Oct. 9, 2021, 5:06 p.m.