R/CI_boot.R

Defines functions CI_scoreBoot

Documented in CI_scoreBoot

##' Confidence interval for the estimated genetic model
##' @title confidence interval for the estimated genetic model
##' @param data  a data set consists of phenotype, genotype under additive genetic model assumption, genotype expanded with grid interval, and covariate if applicable.
##' @param scoreF_data  a list of score functions generated by effScore.
##' @param boot_Num a numeric value of bootrapping replications.
##' @param method_se a method to compute standard error (default='percentile'). Users must choose a method in c('standard', 'percentile', 'BC', 'BCa').
##' 'standard' calculates a standard error of the bootstrap samples. 'percentile' uses percentiles of the bootstrap distribution to define the percentile confidence limits. 'percentile' method corrects for non-normality of the estimator of interest.
##' 'BC' is a Bias-Corrected percentile method which corrects for narrowness bias of percentile CIs. The bias-correction factor isestimated using the inverse cdf of standard normal distribution. 'BCa' is a bias-corrected and accelerated
##' percentaile method which corrects for narrowness bias and for nonconstant standard error of the estimator of interest (acceleration). The acceleration factor estimates
##' the rate of change of the standard error of the estimator.
##' @param alpha_CI a significance level (default=0.05).
##' @param grid an interval of grid search (default=0.01; range: 0 < grid < 1). A lower value of grid gives denser intervals.
##' @import stats
##' @return a list of estimated genetic model and its confidence interval.
##' @author Yeonil Kim
##' @export

CI_scoreBoot <- function(data, scoreF_data, boot_Num = NULL, method_se = "percentile", alpha_CI = NULL, 
    grid) {
    # 
    U_V <- scoreF_data
    
    # grid intervals
    seqC = seq(0, 1, grid)
    
    ## dimension
    n = dim(U_V[[1]])[1]
    m = length(seqC)
    
    ## significance level
    if (!is.null(boot_Num)) {
        
        boot_Num = boot_Num
        
    } else {
        
        boot_Num = 2000
        
    }
    
    ## significance level
    if (!is.null(alpha_CI)) {
        
        a_level = alpha_CI
        
    } else {
        
        a_level = 0.05
        
    }
    lower_a <- a_level/2
    upper_a <- 1 - a_level/2
    
    score_boot = do.call(rbind, lapply(1:boot_Num, function(bb) {
        (colSums(U_V[[3]][, , bb])^2)/U_V[[4]][bb, ]
    }))
    max_score_boot = apply(score_boot, 1, which.max)
    score_max = which.max((colSums(U_V[[1]])^2)/U_V[[2]])
    
    est = seqC[score_max]
    est_boot = seqC[max_score_boot]
    
    
    # standard
    if (method_se == "standard") {
        
        sd_est_output = c(est - abs(qnorm(lower_a)) * sd(est_boot), est + abs(qnorm(lower_a)) * sd(est_boot))
        names(sd_est_output) <- c(paste0(lower_a * 100, "%"), paste0(upper_a * 100, "%"))
        
        
    } else if (method_se == "percentile") {
        
        # percentile
        sd_est_output <- c(quantile(est_boot, lower_a), quantile(est_boot, upper_a))
        names(sd_est_output) <- c(paste0(lower_a * 100, "%"), paste0(upper_a * 100, "%"))
        
        
    } else if (method_se == "BC") {
        
        # BC
        p0 = sum(est_boot <= est)/boot_Num
        z0 = qnorm(p0)
        lo = quantile(est_boot, pnorm(2 * z0 + qnorm(lower_a)))
        hi = quantile(est_boot, pnorm(2 * z0 + qnorm(upper_a)))
        
        sd_est_output <- c(lo, hi)
        names(sd_est_output) <- c(paste0(lower_a * 100, "%"), paste0(upper_a * 100, "%"))
        
        
    } else if (method_se == "BCa") {
        
        # read data
        y = data[[1]]
        geno_grid = data[[5]]
        z = data[[3]]
        
        # BCa
        max_score_i = numeric(n)
        for (i in 1:n) {
            y_i = y[-i]
            X_i = geno_grid[-i, ]
            z_i = z[-i]
            
            glm_null_i = glm(y_i ~ z_i, family = "binomial")
            ans.score2 = effScore(y_i, X_i, z_i, glm_null_i$fitted)
            
            score_i = (colSums(ans.score2[[1]])^2)/ans.score2[[2]]
            max_score_i[i] = which.max(score_i)
            
        }
        est_i = seqC[max_score_i]
        est_i = n * est - (n - 1) * est_i
        
        if (sum((est_i - mean(est_i))^2) == 0) {
            a = 0
        } else {
            a = sum((est_i - mean(est_i))^3)/(6 * sum((est_i - mean(est_i))^2)^1.5)
        }
        p0 = sum(est_boot <= est)/boot_Num
        z0 = qnorm(p0)
        z_alpha = c(qnorm(lower_a), qnorm(upper_a))
        BCa = z0 + {
            z0 + z_alpha
        }/{
            1 - a * (z0 + z_alpha)
        }
        CI = quantile(est_boot, pnorm(BCa))
        sd_est_output <- CI
        names(sd_est_output) <- c(paste0(lower_a * 100, "%"), paste0(upper_a * 100, "%"))
        
    }
    
    
    return(list(est.gmodel = est, CI = sd_est_output, method_se = method_se))
}
ykim03517/reSNP documentation built on Nov. 5, 2019, 1:20 p.m.