inst/doc/ciee.R

## ---- echo=FALSE---------------------------------------------------------
generate_data <- function(setting = "GLM", n = 1000, maf = 0.2, cens = 0.3,
                          a = NULL, b = NULL, aXK = 0.2, aXY = 0.1, aXL = 0,
                          aKY = 0.3, aLK = 0, aLY = 0, aUY = 0, aUL = 0,
                          mu_X = NULL, sd_X = NULL, X_orth_U = TRUE, mu_U = 0,
                          sd_U = 1, mu_K = 0, sd_K = 1, mu_L = 0, sd_L = 1,
                          mu_Y = 0, sd_Y = 1) {
    U_out <- rnorm(n, mean = mu_U, sd = sd_U)
    if (setting == "AFT" & (is.null(a) | is.null(b))) {
        stop("a and b have to be specified under the AFT setting.")
    }
    if (X_orth_U == TRUE) {
        X_out <- rbinom(n, size = 2, prob = maf)
    }
    if (X_orth_U == FALSE) {
        X_out <- pnorm(U_out, mean = mu_X, sd = sd_X)
        p <- 1 - maf
        for (j in 1:length(X_out)) {
            if (X_out[j] < p^2) {
                X_out[j] <- 0
                next
            }
            if (X_out[j] >= p^2 & X_out[j] < p^2 + 2 * p * (1 - p)) {
                X_out[j] <- 1
                next
            }
            if (X_out[j] >= p^2 + 2 * p * (1 - p)) {
                X_out[j] <- 2
                next
            }
        }
    }
    L_out <- aUL * U_out + aXL * X_out + rnorm(n, mean = mu_L, sd = sd_L)
    K_out <- aXK * X_out + aLK * L_out + rnorm(n, mean = mu_K, sd = sd_K)
    Y_out <- aUY * U_out + aKY * K_out + aXY * X_out + aLY * L_out +
             rnorm(n, mean = mu_Y, sd = sd_Y)
    data <- data.frame(Y = Y_out, K = K_out, X = X_out, L = L_out, U = U_out)
    if (setting == "AFT") {
        T_help <- exp(Y_out)
        ### Create censoring indicator and censored times no censoring
        if (cens == 0) {
            T_out <- T_help
            C_out <- rep(1, n)  # C_out==0 is censored, C_out==1 is uncensored
        }
        if (!cens == 0) {
            # there is censoring; cens is the percentage of censored data
            T_cens <- runif(n, min = a, max = b)  # a, b for desired censoring rate
            C_out <- as.numeric(T_help < T_cens)  # C==0 censored, C==1 uncensored
            T_out <- pmin(T_help, T_cens)
            Y_out <- log(T_out)
        }
        cens_out <- sum(abs(C_out - 1))/n
        data <- data.frame(Y = Y_out, K = K_out, X = X_out, L = L_out, U = U_out,
                           T = T_out, C = C_out)
        print(paste("The empirical censoring rate obtained through the specified parameters a=", a, " and b=", b, " is ", cens_out, ".", sep = ""))
        if (abs(cens_out - cens) > 0.1) {
            warning(paste("This obtained empirical censoring rate is quite different from the desired censoring rate cens=", cens, ". Please check and adapt values for a and b.",
                sep = ""))
        }
    }
    return(data)
}

## ---- echo=FALSE---------------------------------------------------------
mult_reg <- function(setting = "GLM", Y = NULL, X = NULL, K = NULL,
                     L = NULL, C = NULL) {
    if (!requireNamespace("survival", quietly = TRUE)) {
        stop("Pkg needed for this function to work. Please install it.", call. = FALSE)
    }
    if (is.null(setting)) {
        stop("setting has to be supplied.")
    }
    if (is.null(Y) | is.null(X) | is.null(K) | is.null(L)) {
        stop("Data of one or more variables are not supplied.")
    }
    if (setting == "AFT" & is.null(C)) {
        stop("C has to be supplied for the AFT setting.")
    }
    if (setting == "GLM") {
        fit_mult_reg <- lm(Y ~ K + X + L)
        point_estimates <- c(summary(fit_mult_reg)$coefficients[1, 1],
                             summary(fit_mult_reg)$coefficients[2, 1],
                             summary(fit_mult_reg)$coefficients[3, 1],
                             summary(fit_mult_reg)$coefficients[4, 1])
        SE_estimates <- c(summary(fit_mult_reg)$coefficients[1, 2],
                          summary(fit_mult_reg)$coefficients[2, 2],
                          summary(fit_mult_reg)$coefficients[3, 2],
                          summary(fit_mult_reg)$coefficients[4, 2])
        pvalues <- c(summary(fit_mult_reg)$coefficients[1, 4],
                     summary(fit_mult_reg)$coefficients[2, 4],
                     summary(fit_mult_reg)$coefficients[3, 4],
                     summary(fit_mult_reg)$coefficients[4, 4])
        names(point_estimates) <- names(SE_estimates) <- names(pvalues) <-
          c("alpha_0", "alpha_1", "alpha_XY", "alpha_2")
    }
    if (setting == "AFT") {
        fit_mult_reg <- survival::survreg(survival::Surv(Y, C) ~ K + X + L,
                                          dist = "gaussian")
        point_estimates <- c(summary(fit_mult_reg)$table[1, 1],
                             summary(fit_mult_reg)$table[2, 1],
                             summary(fit_mult_reg)$table[3, 1],
                             summary(fit_mult_reg)$table[4, 1])
        SE_estimates <- c(summary(fit_mult_reg)$table[1, 2],
                          summary(fit_mult_reg)$table[2, 2],
                          summary(fit_mult_reg)$table[3, 2],
                          summary(fit_mult_reg)$table[4, 2])
        pvalues <- c(summary(fit_mult_reg)$table[1, 4],
                     summary(fit_mult_reg)$table[2, 4],
                     summary(fit_mult_reg)$table[3, 4],
                     summary(fit_mult_reg)$table[4, 4])
        names(point_estimates) <- names(SE_estimates) <- names(pvalues) <-
          c("alpha_0", "alpha_1", "alpha_XY", "alpha_2")
    }
    return(list(point_estimates = point_estimates,
                SE_estimates = SE_estimates, pvalues = pvalues))
}

## ---- echo=FALSE---------------------------------------------------------
res_reg <- function(Y = NULL, X = NULL, K = NULL, L = NULL) {
    if (is.null(Y) | is.null(X) | is.null(K) | is.null(L)) {
        stop("Data of one or more variables is not supplied.")
    }
    data_help <- data.frame(Y = Y, X = X, K = K, L = L)
    data_help <- data_help[complete.cases(data_help), ]
    fit_res_reg_1 <- lm(data_help$Y ~ data_help$K + data_help$L)
    res <- fit_res_reg_1$residuals
    fit_res_reg_2 <- lm(res ~ data_help$X)

    point_estimates <- c(summary(fit_res_reg_1)$coefficients[1, 1],
                         summary(fit_res_reg_1)$coefficients[2, 1],
                         summary(fit_res_reg_1)$coefficients[3, 1],
                         summary(fit_res_reg_2)$coefficients[1, 1],
                         summary(fit_res_reg_2)$coefficients[2, 1])
    SE_estimates <- c(summary(fit_res_reg_1)$coefficients[1, 2],
                      summary(fit_res_reg_1)$coefficients[2, 2],
                      summary(fit_res_reg_1)$coefficients[3, 2],
                      summary(fit_res_reg_2)$coefficients[1, 2],
                      summary(fit_res_reg_2)$coefficients[2, 2])
    pvalues <- c(summary(fit_res_reg_1)$coefficients[1, 4],
                 summary(fit_res_reg_1)$coefficients[2, 4],
                 summary(fit_res_reg_1)$coefficients[3, 4],
                 summary(fit_res_reg_2)$coefficients[1, 4],
                 summary(fit_res_reg_2)$coefficients[2, 4])
    names(point_estimates) <- names(SE_estimates) <- names(pvalues) <-
      c("alpha_0", "alpha_1", "alpha_2", "alpha_3", "alpha_XY")
    return(list(point_estimates = point_estimates,
                SE_estimates = SE_estimates, pvalues = pvalues))
}

## ------------------------------------------------------------------------
dat <- generate_data(setting="GLM", n = 1000, maf = 0.2, cens = 0.3, a = NULL, b = NULL, 
                     aUL = 0, aXL = 0, aXK = 0.2, aLK = 0, aUY = 0, aKY = 0.3, aXY = 0.1,
                     aLY = 0, mu_U = 0, sd_U = 1, X_orth_U = TRUE, mu_X = NULL, 
                     sd_X = NULL, mu_L = 0, sd_L = 1, mu_K = 0, sd_K = 1, mu_Y = 0, 
                     sd_Y = 1)
head(dat)
mult_reg(setting = "GLM", Y = dat$Y, X = dat$X, K = dat$K, L = dat$L)
res_reg(Y = dat$Y, X = dat$X, K = dat$K, L = dat$L)

## ---- echo=FALSE---------------------------------------------------------
sem_appl <- function(setting = "GLM", Y = NULL, X = NULL, K = NULL, L = NULL) {
    if (!requireNamespace("lavaan", quietly = TRUE)) {
        stop("Pkg needed for this function to work. Please install it.", call. = FALSE)
    }
    if (is.null(setting)) {
        stop("setting has to be supplied.")
    }
    if (setting == "AFT") {
        stop("Only GLM setting is implemented.")
    }
    if (is.null(Y) | is.null(X) | is.null(K) | is.null(L)) {
        stop("Data of one or more variables are not supplied.")
    }
    data_help <- data.frame(Y = Y, X = X, K = K, L = L)
    data_help <- data_help[complete.cases(data_help), ]
    model <- "
              L ~ X
              K ~ X + L
              Y ~ K + X
             "
    fit <- lavaan::sem(model, data = data_help)

    point_estimates <- c(fit@Fit@est[1], fit@Fit@est[2], fit@Fit@est[3],
                         fit@Fit@est[4], fit@Fit@est[5])
    SE_estimates <- c(fit@Fit@se[1], fit@Fit@se[2], fit@Fit@se[3],
                      fit@Fit@se[4], fit@Fit@se[5])
    pvalues <- 2 * pnorm(-abs(point_estimates/SE_estimates))
    names(point_estimates) <- names(SE_estimates) <- names(pvalues) <-
      c("alpha_1", "alpha_3", "alpha_4", "alpha_6", "alpha_XY")
    return(list(point_estimates = point_estimates,
                SE_estimates = SE_estimates, pvalues = pvalues))
}

## ------------------------------------------------------------------------
sem_appl(Y = dat$Y, X = dat$X, K = dat$K, L = dat$L)

## ---- echo=FALSE---------------------------------------------------------
est_funct_expr <- function(setting = "GLM") {
    if (is.null(setting)) {
        stop("setting has to be supplied.")
    }
    if (setting == "GLM") {
        logL1 <- expression(log((1/sqrt(sigma1sq)) * dnorm((y_i - alpha0 -
                            alpha1 * k_i - alpha2 * x_i - alpha3 * l_i)/
                            sqrt(sigma1sq), mean = 0, sd = 1)))
        logL2 <- expression(log((1/sqrt(sigma2sq)) * dnorm((y_i - y_bar -
                            alpha1 * (k_i - k_bar) - alpha4 - alphaXY * x_i)/
                            sqrt(sigma2sq), mean = 0, sd = 1)))
    }
    if (setting == "AFT") {
        logL1 <- expression(-c_i * log(sigma1) + c_i * log(dnorm((y_i -
                            alpha0 - alpha1 * k_i - alpha2 * x_i -
                            alpha3 * l_i)/sigma1, mean = 0, sd = 1)) +
                            (1 - c_i) * log(1 - pnorm((y_i - alpha0 -
                            alpha1 * k_i - alpha2 * x_i - alpha3 * l_i)/
                            sigma1, mean = 0, sd = 1)))
        logL2 <- expression(log((1/sqrt(sigma2sq)) * dnorm(((c_i * y_i +
                            (1 - c_i) * ((alpha0 + alpha1 * k_i +
                            alpha2 * x_i + alpha3 * l_i) + (sigma1 *
                            dnorm((y_i - alpha0 - alpha1 * k_i -
                            alpha2 * x_i - alpha3 * l_i)/sigma1, mean = 0,
                            sd = 1)/(1 - pnorm((y_i - alpha0 - alpha1 * k_i -
                            alpha2 * x_i - alpha3 * l_i)/sigma1, mean = 0,
                            sd = 1))))) - y_adj_bar - alpha1 * (k_i - k_bar) -
                            alpha4 - alphaXY * x_i)/sqrt(sigma2sq),
                            mean = 0, sd = 1)))
    }
    return(list(logL1 = logL1, logL2 = logL2))
}

## ------------------------------------------------------------------------
estfunct <- est_funct_expr(setting="GLM")
estfunct
est_funct_expr(setting = "AFT")

## ---- echo=FALSE---------------------------------------------------------
get_estimates <- function(setting = "GLM", Y = NULL, X = NULL, K = NULL,
                          L = NULL, C = NULL) {
    if (!requireNamespace("survival", quietly = TRUE)) {
        stop("Pkg needed for this function to work. Please install it.", call. = FALSE)
    }
    if (is.null(setting)) {
        stop("setting has to be supplied.")
    }
    if (is.null(Y) | is.null(X) | is.null(K) | is.null(L)) {
        stop("Data of one or more variables are not supplied.")
    }
    if (setting == "AFT" & is.null(C)) {
        stop("C has to be supplied for the AFT setting.")
    }
    n <- length(Y)

    if (setting == "GLM") {
        data_help <- data.frame(Y = Y, X = X, K = K, L = L)
        data_help <- data_help[complete.cases(data_help), ]
        ######### Stage 1 #########
        fit_stage_1 <- lm(data_help$Y ~ data_help$K + data_help$X + data_help$L)

        alpha_0_out <- summary(fit_stage_1)$coefficients[1, 1]
        alpha_1_out <- summary(fit_stage_1)$coefficients[2, 1]
        alpha_2_out <- summary(fit_stage_1)$coefficients[3, 1]
        alpha_3_out <- summary(fit_stage_1)$coefficients[4, 1]
        sigma_1_sq_out <- (n - 4)/n * summary(fit_stage_1)$sigma^2

        ######### Stage 2 #########
        Y_tilde <- data_help$Y - mean(data_help$Y) -
                   alpha_1_out * (data_help$K - mean(data_help$K))
        fit_stage_2 <- lm(Y_tilde ~ data_help$X)

        alpha_4_out <- summary(fit_stage_2)$coefficients[1, 1]
        alpha_XY_out <- summary(fit_stage_2)$coefficients[2, 1]
        sigma_2_sq_out <- (n - 2)/n * summary(fit_stage_2)$sigma^2

        point_estimates <- c(alpha_0_out, alpha_1_out, alpha_2_out, alpha_3_out,
                             sigma_1_sq_out, alpha_4_out, alpha_XY_out, sigma_2_sq_out)
        names(point_estimates) <- c("alpha_0", "alpha_1", "alpha_2", "alpha_3",
                                    "sigma_1_sq", "alpha_4", "alpha_XY", "sigma_2_sq")
    }
    if (setting == "AFT") {
        data_help <- data.frame(Y = Y, X = X, K = K, L = L, C = C)
        data_help <- data_help[complete.cases(data_help), ]
        ######### Stage 1 #########
        fit_stage_1 <- survival::survreg(survival::Surv(data_help$Y, data_help$C) ~
                                         data_help$K + data_help$X + data_help$L,
                                         dist = "gaussian")

        alpha_0_out <- summary(fit_stage_1)$table[1, 1]
        alpha_1_out <- summary(fit_stage_1)$table[2, 1]
        alpha_2_out <- summary(fit_stage_1)$table[3, 1]
        alpha_3_out <- summary(fit_stage_1)$table[4, 1]
        sigma_1_out <- fit_stage_1$scale

        ######### Stage 2 #########
        mu <- fit_stage_1$linear.predictors
        Y_adj <- data_help$C * data_help$Y + (1 - data_help$C) * (mu +
                 (sigma_1_out * dnorm((data_help$Y - mu)/sigma_1_out,
                 mean = 0, sd = 1)/(1 - pnorm((data_help$Y - mu)/sigma_1_out,
                 mean = 0, sd = 1))))
        Y_tilde <- Y_adj - mean(Y_adj) -
                   alpha_1_out * (data_help$K - mean(data_help$K))
        fit_stage_2 <- lm(Y_tilde ~ data_help$X)

        alpha_4_out <- summary(fit_stage_2)$coefficients[1, 1]
        alpha_XY_out <- summary(fit_stage_2)$coefficients[2, 1]
        sigma_2_sq_out <- (n - 2)/n * summary(fit_stage_2)$sigma^2

        point_estimates <- c(alpha_0_out, alpha_1_out, alpha_2_out, alpha_3_out,
                             sigma_1_out, alpha_4_out, alpha_XY_out, sigma_2_sq_out,
                             mean(Y_adj))
        names(point_estimates) <- c("alpha_0", "alpha_1", "alpha_2", "alpha_3",
                                    "sigma_1", "alpha_4", "alpha_XY", "sigma_2_sq",
                                    "y_adj_bar")
    }
    return(point_estimates)
}

## ------------------------------------------------------------------------
estimates <- get_estimates(setting = "GLM", Y = dat$Y, X = dat$X, K = dat$K, L = dat$L)
estimates

## ---- echo=FALSE---------------------------------------------------------
deriv_obj <- function(setting = "GLM", logL1 = NULL, logL2 = NULL, Y = NULL,
                      X = NULL, K = NULL, L = NULL, C = NULL,
                      estimates = NULL) {
    if (is.null(setting) | is.null(logL1) | is.null(logL2) |
        is.null(estimates)) {
        stop("One or more arguments of the function are missing.")
    }
    if (is.null(Y) | is.null(X) | is.null(K) | is.null(L)) {
        stop("Data of one or more variables are not supplied.")
    }
    if (setting == "AFT" & is.null(C)) {
        stop("C has to be supplied for the AFT setting.")
    }
    n <- length(Y)
    if (setting == "GLM") {
        data_help <- data.frame(Y = Y, X = X, K = K, L = L)
        data_help <- data_help[complete.cases(data_help), ]
        U12345_i <- deriv(expr = logL1, namevec = c("alpha0", "alpha1", "alpha2",
                          "alpha3", "sigma1sq", "alpha4", "alphaXY", "sigma2sq"),
                          function.arg = c("y_i", "k_i", "x_i", "l_i", "alpha0",
                          "alpha1", "alpha2", "alpha3", "sigma1sq"), func = T,
                          hessian = T)
        U678_i <- deriv(expr = logL2, namevec = c("alpha0", "alpha1", "alpha2",
                        "alpha3", "sigma1sq", "alpha4", "alphaXY", "sigma2sq"),
                        function.arg = c("y_i", "k_i", "x_i", "y_bar", "k_bar",
                        "alpha1", "alpha4", "alphaXY", "sigma2sq"), func = T,
                        hessian = T)
        logL1_deriv <- attributes(U12345_i(y_i = data_help$Y, k_i = data_help$K,
                                  x_i = data_help$X, l_i = data_help$L,
                                  alpha0 = estimates[names(estimates) == "alpha_0"],
                                  alpha1 = estimates[names(estimates) == "alpha_1"],
                                  alpha2 = estimates[names(estimates) == "alpha_2"],
                                  alpha3 = estimates[names(estimates) == "alpha_3"],
                                  sigma1sq = estimates[names(estimates) == "sigma_1_sq"]))
        logL2_deriv <- attributes(U678_i(y_i = data_help$Y, k_i = data_help$K,
                                  x_i = data_help$X, y_bar = mean(data_help$Y),
                                  k_bar = mean(data_help$K),
                                  alpha1 = estimates[names(estimates) == "alpha_1"],
                                  alpha4 = estimates[names(estimates) == "alpha_4"],
                                  alphaXY = estimates[names(estimates) == "alpha_XY"],
                                  sigma2sq = estimates[names(estimates) == "sigma_2_sq"]))
        deriv_obj <- list(logL1_deriv = logL1_deriv, logL2_deriv = logL2_deriv)
    }
    if (setting == "AFT") {
        data_help <- data.frame(Y = Y, X = X, K = K, L = L, C = C)
        data_help <- data_help[complete.cases(data_help), ]
        U12345_i <- deriv(expr = logL1, namevec = c("alpha0", "alpha1", "alpha2",
                          "alpha3", "sigma1", "alpha4", "alphaXY", "sigma2sq"),
                          function.arg = c("y_i", "c_i", "k_i", "x_i", "l_i",
                          "alpha0", "alpha1", "alpha2", "alpha3", "sigma1"),
                          func = T, hessian = T)
        U678_i <- deriv(expr = logL2, namevec = c("alpha0", "alpha1", "alpha2",
                        "alpha3", "sigma1", "alpha4", "alphaXY", "sigma2sq"),
                        function.arg = c("y_i", "c_i", "k_i", "x_i", "l_i",
                        "y_adj_bar", "k_bar", "alpha0", "alpha1", "alpha2",
                        "alpha3", "sigma1", "alpha4", "alphaXY", "sigma2sq"),
                        func = T, hessian = T)
        logL1_deriv <- attributes(U12345_i(y_i = data_help$Y, c_i = data_help$C,
                                  k_i = data_help$K, x_i = data_help$X, l_i = data_help$L,
                                  alpha0 = estimates[names(estimates) == "alpha_0"],
                                  alpha1 = estimates[names(estimates) == "alpha_1"],
                                  alpha2 = estimates[names(estimates) == "alpha_2"],
                                  alpha3 = estimates[names(estimates) == "alpha_3"],
                                  sigma1 = estimates[names(estimates) == "sigma_1"]))
        logL2_deriv <- attributes(U678_i(y_i = data_help$Y, c_i = data_help$C,
                                  k_i = data_help$K, x_i = data_help$X, l_i = data_help$L,
                                  y_adj_bar = estimates[names(estimates) == "y_adj_bar"],
                                  k_bar = mean(data_help$K),
                                  alpha0 = estimates[names(estimates) == "alpha_0"],
                                  alpha1 = estimates[names(estimates) == "alpha_1"],
                                  alpha2 = estimates[names(estimates) == "alpha_2"],
                                  alpha3 = estimates[names(estimates) == "alpha_3"],
                                  sigma1 = estimates[names(estimates) == "sigma_1"],
                                  alpha4 = estimates[names(estimates) == "alpha_4"],
                                  alphaXY = estimates[names(estimates) == "alpha_XY"],
                                  sigma2sq = estimates[names(estimates) == "sigma_2_sq"]))
        deriv_obj <- list(logL1_deriv = logL1_deriv, logL2_deriv = logL2_deriv)
    }
    return(deriv_obj)
}

## ---- echo=FALSE---------------------------------------------------------
scores <- function(derivobj = NULL) {
    if (is.null(derivobj)) {
        stop("derivobj has to be supplied.")
    }
    scores_out <- cbind(derivobj[[1]]$gradient[, 1:5],
                        derivobj[[2]]$gradient[, 6:8])
    return(scores_out)
}

## ---- echo=FALSE---------------------------------------------------------
hessian <- function(derivobj = NULL) {
    if (is.null(derivobj)) {
        stop("derivobj has to be supplied.")
    }
    hessian_out <- derivobj[[1]]$hessian
    hessian_out[, 6:8, ] <- derivobj[[2]]$hessian[, 6:8, ]
    return(hessian_out)
}

## ------------------------------------------------------------------------
derivobj <- deriv_obj(setting = "GLM", logL1 = estfunct$logL1, logL2 = estfunct$logL2, 
                      Y = dat$Y, X = dat$X, K = dat$K, L = dat$L, estimates = estimates)
names(derivobj)
head(derivobj$logL1_deriv$gradient)
score_matrix <- scores(derivobj)
head(score_matrix)
hessian_matrix <- hessian(derivobj)
str(hessian_matrix)

## ---- echo=FALSE---------------------------------------------------------
sandwich_se <- function(setting = "GLM", scores = NULL, hessian = NULL) {
    if (is.null(scores) | is.null(hessian)) {
        stop("scores and hessian have to be supplied.")
    }
    n <- dim(scores)[1]

    ### A_n matrix ###
    A_n <- matrix(, nrow = 8, ncol = 8)
    for (A_n_i in 1:8) {
        for (A_n_j in 1:8) {
            A_n[A_n_i, A_n_j] <- sum(hessian[, A_n_i, A_n_j])
        }
    }
    A_n <- -(1/n) * A_n

    ### B_n matrix ###
    B_n <- matrix(, nrow = 8, ncol = 8)
    for (B_n_i in 1:8) {
        for (B_n_j in 1:8) {
            B_n[B_n_i, B_n_j] <- sum(scores[, B_n_i] * scores[, B_n_j])
        }
    }
    B_n <- (1/n) * B_n

    ### C_n matrix ###
    C_n <- solve(A_n) %*% B_n %*% t(solve(A_n))

    ### Variance estimates of coefficients ###
    theta_EE_se <- (1/n) * diag(C_n)
    theta_EE_se <- sqrt(theta_EE_se)
    if (setting == "GLM") {
        names(theta_EE_se) <- c("alpha_0", "alpha_1", "alpha_2", "alpha_3",
                                "sigma_1_sq", "alpha_4", "alpha_XY", "sigma_2_sq")
    }
    if (setting == "AFT") {
        names(theta_EE_se) <- c("alpha_0", "alpha_1", "alpha_2", "alpha_3",
                                "sigma_1", "alpha_4", "alpha_XY", "sigma_2_sq")
    }
    return(theta_EE_se)
}

## ------------------------------------------------------------------------
sandwich_se(scores = score_matrix, hessian = hessian_matrix)

## ---- echo=FALSE---------------------------------------------------------
bootstrap_se <- function(setting = "GLM", BS_rep = 1000, Y = NULL, X = NULL,
                         K = NULL, L = NULL, C = NULL) {
    if (is.null(setting)) {
        stop("setting has to be supplied.")
    }
    if (is.null(Y) | is.null(X) | is.null(K) | is.null(L)) {
        stop("Data of one or more variables are not supplied.")
    }
    if (setting == "AFT" & is.null(C)) {
        stop("C has to be supplied for the AFT setting.")
    }
    n <- length(Y)

    if (setting == "GLM") {
        alpha_0_SE_BS_help <- alpha_1_SE_BS_help <-
          alpha_2_SE_BS_help <- alpha_3_SE_BS_help <-
          sigma_1_sq_SE_BS_help <- alpha_4_SE_BS_help <-
          alpha_XY_SE_BS_help <- sigma_2_sq_SE_BS_help <- NULL
    }
    if (setting == "AFT") {
        alpha_0_SE_BS_help <- alpha_1_SE_BS_help <-
          alpha_2_SE_BS_help <- alpha_3_SE_BS_help <-
          sigma_1_SE_BS_help <- alpha_4_SE_BS_help <-
          alpha_XY_SE_BS_help <- sigma_2_sq_SE_BS_help <- NULL
    }
    for (rep in 1:BS_rep) {
        id <- sample(1:n, n, replace = TRUE)
        if (setting == "GLM") {
            data_id <- data.frame(X = X[id], L = L[id], K = K[id], Y = Y[id])
            estimates <- get_estimates(setting = setting, Y = data_id$Y, X = data_id$X,
                                       K = data_id$K, L = data_id$L)
        }
        if (setting == "AFT") {
            data_id <- data.frame(X = X[id], L = L[id], K = K[id], Y = Y[id],
                                  T = T[id], C = C[id])
            estimates <- get_estimates(setting = setting, Y = data_id$Y, X = data_id$X,
                                       K = data_id$K, L = data_id$L, C = data_id$C)
        }
        alpha_0_SE_BS_help[rep] <- estimates[names(estimates) == "alpha_0"]
        alpha_1_SE_BS_help[rep] <- estimates[names(estimates) == "alpha_1"]
        alpha_2_SE_BS_help[rep] <- estimates[names(estimates) == "alpha_2"]
        alpha_3_SE_BS_help[rep] <- estimates[names(estimates) == "alpha_3"]
        if (setting == "GLM") {
            sigma_1_sq_SE_BS_help[rep] <- estimates[names(estimates) == "sigma_1_sq"]
        }
        if (setting == "AFT") {
            sigma_1_SE_BS_help[rep] <- estimates[names(estimates) == "sigma_1"]
        }
        alpha_4_SE_BS_help[rep] <- estimates[names(estimates) == "alpha_4"]
        alpha_XY_SE_BS_help[rep] <- estimates[names(estimates) == "alpha_XY"]
        sigma_2_sq_SE_BS_help[rep] <- estimates[names(estimates) == "sigma_2_sq"]
    }
    if (setting == "GLM") {
        theta_bootstrap_se <- c(sd(alpha_0_SE_BS_help,na.rm=T),
                                sd(alpha_1_SE_BS_help,na.rm=T),
                                sd(alpha_2_SE_BS_help,na.rm=T),
                                sd(alpha_3_SE_BS_help,na.rm=T),
                                sd(sigma_1_sq_SE_BS_help,na.rm=T),
                                sd(alpha_4_SE_BS_help,na.rm=T),
                                sd(alpha_XY_SE_BS_help,na.rm=T),
                                sd(sigma_2_sq_SE_BS_help,na.rm=T))
        names(theta_bootstrap_se) <- c("alpha_0", "alpha_1", "alpha_2", "alpha_3",
                                       "sigma_1_sq", "alpha_4", "alpha_XY", "sigma_2_sq")
    }
    if (setting == "AFT") {
        theta_bootstrap_se <- c(sd(alpha_0_SE_BS_help,na.rm=T),
                                sd(alpha_1_SE_BS_help,na.rm=T),
                                sd(alpha_2_SE_BS_help,na.rm=T),
                                sd(alpha_3_SE_BS_help,na.rm=T),
                                sd(sigma_1_SE_BS_help,na.rm=T),
                                sd(alpha_4_SE_BS_help,na.rm=T),
                                sd(alpha_XY_SE_BS_help,na.rm=T),
                                sd(sigma_2_sq_SE_BS_help,na.rm=T))
        names(theta_bootstrap_se) <- c("alpha_0", "alpha_1", "alpha_2", "alpha_3",
                                       "sigma_1", "alpha_4", "alpha_XY", "sigma_2_sq")
    }
    return(theta_bootstrap_se)
}

## ---- echo=FALSE---------------------------------------------------------
naive_se <- function(setting = "GLM", Y = NULL, X = NULL,
                     K = NULL, L = NULL, C = NULL) {
    if (!requireNamespace("survival", quietly = TRUE)) {
        stop("Pkg needed for this function to work. Please install it.", call. = FALSE)
    }
    if (is.null(setting)) {
        stop("setting has to be supplied.")
    }
    if (is.null(Y) | is.null(X) | is.null(K) | is.null(L)) {
        stop("Data of one or more variables are not supplied.")
    }
    if (setting == "AFT" & is.null(C)) {
        stop("C has to be supplied for the AFT setting.")
    }
    n <- length(Y)

    if (setting == "GLM") {
        data_help <- data.frame(Y = Y, X = X, K = K, L = L)
        data_help <- data_help[complete.cases(data_help), ]
        ######### Stage 1 #########
        fit_stage_1 <- lm(data_help$Y ~ data_help$K + data_help$X + data_help$L)

        alpha_1_out <- summary(fit_stage_1)$coefficients[2, 1]
        alpha_0_SE_out <- summary(fit_stage_1)$coefficients[1, 2]
        alpha_1_SE_out <- summary(fit_stage_1)$coefficients[2, 2]
        alpha_2_SE_out <- summary(fit_stage_1)$coefficients[3, 2]
        alpha_3_SE_out <- summary(fit_stage_1)$coefficients[4, 2]

        ######### Stage 2 #########
        Y_tilde <- data_help$Y - mean(data_help$Y) -
                   alpha_1_out * (data_help$K - mean(data_help$K))
        fit_stage_2 <- lm(Y_tilde ~ data_help$X)

        alpha_4_SE_out <- summary(fit_stage_2)$coefficients[1, 2]
        alpha_XY_SE_out <- summary(fit_stage_2)$coefficients[2, 2]
    }
    if (setting == "AFT") {
        data_help <- data.frame(Y = Y, X = X, K = K, L = L, C = C)
        data_help <- data_help[complete.cases(data_help), ]
        ######### Stage 1 #########
        fit_stage_1 <- survival::survreg(survival::Surv(data_help$Y, data_help$C) ~
                                         data_help$K + data_help$X + data_help$L,
                                         dist = "gaussian")

        alpha_1_out <- summary(fit_stage_1)$table[2, 1]
        sigma_1_out <- fit_stage_1$scale
        alpha_0_SE_out <- summary(fit_stage_1)$table[1, 2]
        alpha_1_SE_out <- summary(fit_stage_1)$table[2, 2]
        alpha_2_SE_out <- summary(fit_stage_1)$table[3, 2]
        alpha_3_SE_out <- summary(fit_stage_1)$table[4, 2]

        ######### Stage 2 #########
        mu <- fit_stage_1$linear.predictors
        Y_adj <- data_help$C * data_help$Y + (1 - data_help$C) * (mu + (sigma_1_out *
                 dnorm((data_help$Y - mu)/sigma_1_out, mean = 0, sd = 1)/
                (1 - pnorm((data_help$Y - mu)/sigma_1_out, mean = 0, sd = 1))))
        Y_tilde <- Y_adj - mean(Y_adj) - alpha_1_out * (data_help$K - mean(data_help$K))
        fit_stage_2 <- lm(Y_tilde ~ data_help$X)

        alpha_4_SE_out <- summary(fit_stage_2)$coefficients[1, 2]
        alpha_XY_SE_out <- summary(fit_stage_2)$coefficients[2, 2]
    }
    SE_estimates <- c(alpha_0_SE_out, alpha_1_SE_out, alpha_2_SE_out, alpha_3_SE_out,
                      NA, alpha_4_SE_out, alpha_XY_SE_out, NA)
    names(SE_estimates) <- c("alpha_0", "alpha_1", "alpha_2", "alpha_3", "sigma_1_sq",
                             "alpha_4", "alpha_XY", "sigma_2_sq")
    return(SE_estimates)
}

## ------------------------------------------------------------------------
bootstrap_se(setting = "GLM", BS_rep = 1000, Y = dat$Y, X = dat$X, K = dat$K, L = dat$L)
naive_se(setting = "GLM", Y = dat$Y, X = dat$X, K = dat$K, L = dat$L)

## ---- echo=FALSE---------------------------------------------------------
ciee <- function(setting = "GLM", estimates = c("ee", "mult_reg", "res_reg", "sem"),
                 ee_se = c("sandwich"), BS_rep = NULL, Y = NULL, X = NULL, K = NULL,
                 L = NULL, C = NULL) {
    if (is.null(setting)) {
        stop("setting has to be supplied.")
    }
    if (is.null(estimates)) {
        stop("At least one method has to be computed.")
    }
    if ((("ee" %in% estimates) & is.null(ee_se)) | (("ee" %in% estimates) &
          length(ee_se) > 1)) {
        stop("If the estimating equations approach is chosen, one approach has to be chosen for the computation of standard errors.")
    }
    if (("bootstrap" %in% estimates) & is.null(BS_rep)) {
        stop("For the computation of bootstrap standard errors, the number of bootstrap samples has to be chosen.")
    }
    if (is.null(Y) | is.null(X) | is.null(K) | is.null(L)) {
        stop("Data of one or more variables is not supplied.")
    }
    if (setting == "AFT" & is.null(C)) {
        stop("C has to be supplied for the AFT setting.")
    }
    if (setting == "AFT" & ("sem" %in% estimates)) {
        stop("The structural equations modeling approach is only implemented for the GLM setting.")
    }
    if (setting == "AFT" & ("res_reg" %in% estimates)) {
        stop("The regression of residuals approach is only implemented for the GLM setting.")
    }
    if (setting == "GLM") {
        data_help <- data.frame(Y = Y, X = X, K = K, L = L)
        data_help <- data_help[complete.cases(data_help), ]
        if ("sem" %in% estimates) {
            results_sem <- sem_appl(Y = data_help$Y, X = data_help$X,
                                    K = data_help$K, L = data_help$L)
        }
        if ("mult_reg" %in% estimates) {
            results_mult_reg <- mult_reg(setting = setting, Y = data_help$Y,
                                         X = data_help$X, K = data_help$K,
                                         L = data_help$L)
        }
        if ("res_reg" %in% estimates) {
            results_res_reg <- res_reg(Y = data_help$Y, X = data_help$X,
                                       K = data_help$K, L = data_help$L)
        }
        if ("ee" %in% estimates) {
            point_estimates_ee <- get_estimates(setting = setting, Y = data_help$Y,
                                                X = data_help$X, K = data_help$K,
                                                L = data_help$L)
            if (ee_se == "sandwich") {
                # Obtain estimating functions expressions
                estfunct <- est_funct_expr(setting = "GLM")
                # Obtain matrices with all first and second derivatives
                derivobj <- deriv_obj(setting = setting, logL1 = estfunct$logL1,
                                      logL2 = estfunct$logL2, Y = data_help$Y,
                                      X = data_help$X, K = data_help$K,
                                      L = data_help$L,
                                      estimates = point_estimates_ee)
                # Obtain score and hessian matrices
                results_scores <- scores(derivobj)
                results_hessian <- hessian(derivobj)
                # Obtain sandwich standard error estimates of the parameters
                se_estimates_ee <- sandwich_se(setting = setting,
                                               scores = results_scores,
                                               hessian = results_hessian)
            }
            if (ee_se == "bootstrap") {
                se_estimates_ee <- bootstrap_se(setting = setting, BS_rep = BS_rep,
                                                Y = data_help$Y, X = data_help$X,
                                                K = data_help$K, L = data_help$L)
            }
            if (ee_se == "naive") {
                se_estimates_ee <- naive_se(setting = setting, Y = data_help$Y,
                                            X = data_help$X, K = data_help$K,
                                            L = data_help$L)
            }
            wald_test_stat_ee <- point_estimates_ee[1:8]/se_estimates_ee
            pvalues_ee <- 2 * pnorm(-abs(wald_test_stat_ee))
            results_ee <- list(point_estimates = point_estimates_ee[1:8],
                               SE_estimates = se_estimates_ee,
                               wald_test_stat = wald_test_stat_ee,
                               pvalues = pvalues_ee)
        }
    }
    if (setting == "AFT") {
        data_help <- data.frame(Y = Y, X = X, K = K, L = L, C = C)
        data_help <- data_help[complete.cases(data_help), ]
        if ("mult_reg" %in% estimates) {
            results_mult_reg <- mult_reg(setting = setting, Y = data_help$Y,
                                         X = data_help$X, K = data_help$K,
                                         L = data_help$L, C = data_help$C)
        }
        if ("ee" %in% estimates) {
            point_estimates_ee <- get_estimates(setting = setting, Y = data_help$Y,
                                                X = data_help$X, K = data_help$K,
                                                L = data_help$L, C = data_help$C)
            if (ee_se == "sandwich") {
                # Obtain estimating functions expressions
                estfunct <- est_funct_expr(setting = setting)
                # Obtain matrices with all first and second derivatives
                derivobj <- deriv_obj(setting = setting, logL1 = estfunct$logL1,
                                      logL2 = estfunct$logL2, Y = data_help$Y,
                                      X = data_help$X, K = data_help$K,
                                      L = data_help$L, C = data_help$C,
                                      estimates = point_estimates_ee)
                # Obtain score and hessian matrices
                results_scores <- scores(derivobj)
                results_hessian <- hessian(derivobj)
                # Obtain sandwich standard error estimates of the parameters
                se_estimates_ee <- sandwich_se(setting = setting,
                                               scores = results_scores,
                                               hessian = results_hessian)
            }
            if (ee_se == "bootstrap") {
                se_estimates_ee <- bootstrap_se(setting = setting, BS_rep = BS_rep,
                                                Y = data_help$Y, X = data_help$X,
                                                K = data_help$K, L = data_help$L,
                                                C = data_help$C)
            }
            if (ee_se == "naive") {
                se_estimates_ee <- naive_se(setting = setting, Y = data_help$Y,
                                            X = data_help$X, K = data_help$K,
                                            L = data_help$L, C = data_help$C)
            }
            wald_test_stat_ee <- point_estimates_ee[1:8]/se_estimates_ee
            pvalues_ee <- 2 * pnorm(-abs(wald_test_stat_ee))
            results_ee <- list(point_estimates = point_estimates_ee[1:8],
                               SE_estimates = se_estimates_ee,
                               wald_test_stat = wald_test_stat_ee,
                               pvalues = pvalues_ee)
        }
    }
    output <- list()
    if ("ee" %in% estimates) {
        output$results_ee <- results_ee
    }
    if ("mult_reg" %in% estimates) {
        output$results_mult_reg <- results_mult_reg
    }
    if ("res_reg" %in% estimates) {
        output$results_res_reg <- results_res_reg
    }
    if ("sem" %in% estimates) {
        output$results_sem <- results_sem
    }
    class(output) <- "ciee"
    return(output)
}

## ---- echo=FALSE---------------------------------------------------------
ciee_loop <- function(setting = "GLM", estimates = c("ee", "mult_reg", "res_reg",
                      "sem"), ee_se = c("sandwich"), BS_rep = NULL, Y = NULL,
                      X = NULL, K = NULL, L = NULL, C = NULL) {
    if (is.null(setting)) {
        stop("setting has to be supplied.")
    }
    if (is.null(estimates)) {
        stop("At least one method has to be computed.")
    }
    if ((("ee" %in% estimates) & is.null(ee_se)) | (("ee" %in% estimates)
         & length(ee_se) > 1)) {
        stop("If the estimating equations approach is chosen, one approach has to be chosen for the computation of standard errors.")
    }
    if (("bootstrap" %in% estimates) & is.null(BS_rep)) {
        stop("For the computation of bootstrap standard errors, the number of bootstrap samples has to be chosen.")
    }
    if (is.null(Y) | is.null(X) | is.null(K) | is.null(L)) {
        stop("Data of one or more variables is not supplied.")
    }
    if (setting == "AFT" & is.null(C)) {
        stop("C has to be supplied for the AFT setting.")
    }
    if (setting == "AFT" & ("sem" %in% estimates)) {
        stop("The structural equations modeling approach is only implemented for the GLM setting.")
    }
    if (setting == "AFT" & ("res_reg" %in% estimates)) {
        stop("The regression of residuals approach is only implemented for the GLM setting.")
    }
    if ("sem" %in% estimates) {
        results_sem <- list(point_estimates = NULL, SE_estimates = NULL,
                            pvalues = NULL)
    }
    if ("mult_reg" %in% estimates) {
        results_mult_reg <- list(point_estimates = NULL, SE_estimates = NULL,
                                 pvalues = NULL)
    }
    if ("res_reg" %in% estimates) {
        results_res_reg <- list(point_estimates = NULL, SE_estimates = NULL,
                            pvalues = NULL)
    }
    if ("ee" %in% estimates) {
        results_ee <- list(point_estimates = NULL, SE_estimates = NULL,
                           wald_test_stat = NULL, pvalues = NULL)
    }
    k <- dim(X)[2]
    for (i in 1:k) {
        Xi <- X[, i]
        if (setting == "GLM") {
            data_help <- data.frame(Y = Y, X = Xi, K = K, L = L)
            data_help <- data_help[complete.cases(data_help), ]

            if ("sem" %in% estimates) {
                sem_help <- sem_appl(Y = data_help$Y, X = data_help$X,
                                     K = data_help$K, L = data_help$L)
                results_sem$point_estimates[i] <- sem_help$point_estimates[5]
                results_sem$SE_estimates[i] <- sem_help$SE_estimates[5]
                results_sem$pvalues[i] <- sem_help$pvalues[5]
                names(results_sem$point_estimates)[i] <-
                  names(results_sem$SE_estimates)[i] <-
                  names(results_sem$pvalues)[i] <- names(X)[i]
            }
            if ("mult_reg" %in% estimates) {
                mult_reg_help <- mult_reg(setting = setting, Y = data_help$Y,
                                          X = data_help$X, K = data_help$K,
                                          L = data_help$L)
                results_mult_reg$point_estimates[i] <- mult_reg_help$point_estimates[3]
                results_mult_reg$SE_estimates[i] <- mult_reg_help$SE_estimates[3]
                results_mult_reg$pvalues[i] <- mult_reg_help$pvalues[3]
                names(results_mult_reg$point_estimates)[i] <-
                  names(results_mult_reg$SE_estimates)[i] <-
                  names(results_mult_reg$pvalues)[i] <- names(X)[i]
            }
            if ("res_reg" %in% estimates) {
                res_reg_help <- res_reg(Y = data_help$Y, X = data_help$X,
                                        K = data_help$K, L = data_help$L)
                results_res_reg$point_estimates[i] <- res_reg_help$point_estimates[5]
                results_res_reg$SE_estimates[i] <- res_reg_help$SE_estimates[5]
                results_res_reg$pvalues[i] <- res_reg_help$pvalues[5]
                names(results_res_reg$point_estimates)[i] <-
                  names(results_res_reg$SE_estimates)[i] <-
                  names(results_res_reg$pvalues)[i] <- names(X)[i]
            }
            if ("ee" %in% estimates) {
                point_estimates_ee <- get_estimates(setting = setting,
                                                    Y = data_help$Y,
                                                    X = data_help$X,
                                                    K = data_help$K,
                                                    L = data_help$L)
                if (ee_se == "sandwich") {
                    # Obtain estimating functions expressions
                    estfunct <- est_funct_expr(setting = "GLM")
                    # Obtain matrices with all first and second derivatives
                    derivobj <- deriv_obj(setting = setting,
                                          logL1 = estfunct$logL1,
                                          logL2 = estfunct$logL2,
                                          Y = data_help$Y,
                                          X = data_help$X,
                                          K = data_help$K,
                                          L = data_help$L,
                                          estimates = point_estimates_ee)
                    # Obtain score and hessian matrices
                    results_scores <- scores(derivobj)
                    results_hessian <- hessian(derivobj)
                    # Obtain sandwich standard error estimates of the parameters
                    se_estimates_ee <- sandwich_se(setting = setting,
                                                   scores = results_scores,
                                                   hessian = results_hessian)
                }
                if (ee_se == "bootstrap") {
                    se_estimates_ee <- bootstrap_se(setting = setting,
                                                    BS_rep = BS_rep,
                                                    Y = data_help$Y,
                                                    X = data_help$X,
                                                    K = data_help$K,
                                                    L = data_help$L)
                }
                if (ee_se == "naive") {
                    se_estimates_ee <- naive_se(setting = setting,
                                                Y = data_help$Y,
                                                X = data_help$X,
                                                K = data_help$K,
                                                L = data_help$L)
                }
                results_ee$point_estimates[i] <- point_estimates_ee[7]
                results_ee$SE_estimates[i] <- se_estimates_ee[7]
                results_ee$wald_test_stat[i] <- point_estimates_ee[7]/
                                                  se_estimates_ee[7]
                results_ee$pvalues[i] <- 2 * pnorm(-abs(point_estimates_ee[7]/
                                                        se_estimates_ee[7]))
                names(results_ee$point_estimates)[i] <-
                  names(results_ee$SE_estimates)[i] <-
                  names(results_ee$wald_test_stat)[i] <-
                  names(results_ee$pvalues)[i] <- names(X)[i]
            }
        }
        if (setting == "AFT") {
            data_help <- data.frame(Y = Y, X = Xi, K = K, L = L, C = C)
            data_help <- data_help[complete.cases(data_help), ]
            if ("mult_reg" %in% estimates) {
                mult_reg_help <- mult_reg(setting = setting, Y = data_help$Y,
                                          X = data_help$X, K = data_help$K,
                                          L = data_help$L, C = data_help$C)
                results_mult_reg$point_estimates[i] <- mult_reg_help$point_estimates[3]
                results_mult_reg$SE_estimates[i] <- mult_reg_help$SE_estimates[3]
                results_mult_reg$pvalues[i] <- mult_reg_help$pvalues[3]
                names(results_mult_reg$point_estimates)[i] <-
                  names(results_mult_reg$SE_estimates)[i] <-
                  names(results_mult_reg$pvalues)[i] <- names(X)[i]
            }
            if ("ee" %in% estimates) {
                point_estimates_ee <- get_estimates(setting = setting,
                                                    Y = data_help$Y,
                                                    X = data_help$X,
                                                    K = data_help$K,
                                                    L = data_help$L,
                                                    C = data_help$C)
                if (ee_se == "sandwich") {
                    # Obtain estimating functions expressions
                    estfunct <- est_funct_expr(setting = setting)
                    # Obtain matrices with all first and second derivatives
                    derivobj <- deriv_obj(setting = setting,
                                          logL1 = estfunct$logL1,
                                          logL2 = estfunct$logL2,
                                          Y = data_help$Y,
                                          X = data_help$X,
                                          K = data_help$K,
                                          L = data_help$L,
                                          C = data_help$C,
                                          estimates = point_estimates_ee)
                    # Obtain score and hessian matrices
                    results_scores <- scores(derivobj)
                    results_hessian <- hessian(derivobj)
                    # Obtain sandwich standard error estimates of the parameters
                    se_estimates_ee <- sandwich_se(setting = setting,
                                                   scores = results_scores,
                                                   hessian = results_hessian)
                }
                if (ee_se == "bootstrap") {
                    se_estimates_ee <- bootstrap_se(setting = setting,
                                                    BS_rep = BS_rep,
                                                    Y = data_help$Y,
                                                    X = data_help$X,
                                                    K = data_help$K,
                                                    L = data_help$L,
                                                    C = data_help$C)
                }
                if (ee_se == "naive") {
                    se_estimates_ee <- naive_se(setting = setting,
                                                Y = data_help$Y,
                                                X = data_help$X,
                                                K = data_help$K,
                                                L = data_help$L,
                                                C = data_help$C)
                }
                results_ee$point_estimates[i] <- point_estimates_ee[7]
                results_ee$SE_estimates[i] <- se_estimates_ee[7]
                results_ee$wald_test_stat[i] <- point_estimates_ee[7]/
                                                  se_estimates_ee[7]
                results_ee$pvalues[i] <- 2 * pnorm(-abs(point_estimates_ee[7]/
                                                        se_estimates_ee[7]))
                names(results_ee$point_estimates)[i] <-
                  names(results_ee$SE_estimates)[i] <-
                  names(results_ee$wald_test_stat)[i] <-
                  names(results_ee$pvalues)[i] <- names(X)[i]
            }
        }
    }
    output <- list()
    if ("ee" %in% estimates) {
        output$results_ee <- results_ee
    }
    if ("mult_reg" %in% estimates) {
        output$results_mult_reg <- results_mult_reg
    }
    if ("res_reg" %in% estimates) {
        output$results_res_reg <- results_res_reg
    }
    if ("sem" %in% estimates) {
        output$results_sem <- results_sem
    }
    class(output) <- "ciee"
    return(output)
}

## ------------------------------------------------------------------------
results_ciee <- ciee(setting = "GLM", Y = dat$Y, X = dat$X, K = dat$K, L = dat$L,
                     estimates = c("ee", "mult_reg", "res_reg", "sem"),
                     ee_se = "sandwich")
results_ciee

maf <- 0.2
n <- 1000
dat <- generate_data(n = n, maf = maf)
datX <- data.frame(X = dat$X)
names(datX)[1] <- "X1"
for(i in 2:10){
 X <- rbinom(n, size = 2, prob = maf)
 datX$X <- X
 names(datX)[i] <- paste("X", i, sep="")
}
results_ciee_loop <- ciee_loop(setting = "GLM", Y = dat$Y, X = datX, K = dat$K, L = dat$L)
results_ciee_loop

## ---- echo=FALSE---------------------------------------------------------
summary.ciee <- function(results = NULL) {
    if (is.null(results)) {
        stop("ciee output has to be supplied.")
    }
    res_out <- NULL
    if ("results_ee" %in% names(results)) {
        res_ee_out <- data.frame(point_estimates = results$results_ee$point_estimates,
                                 SE_estimates = results$results_ee$SE_estimates,
                                 wald_test_stat = results$results_ee$wald_test_stat,
                                 pvalues = results$results_ee$pvalues)
        rownames(res_ee_out) <- paste("CIEE", rownames(res_ee_out), sep = "_")
        print(paste("Results based on estimating equations."))
        print(res_ee_out)
        res_out <- res_ee_out[,c(1,2,4)]
    }
    if ("results_mult_reg" %in% names(results)) {
        res_mr_out <- data.frame(point_estimates = results$results_mult_reg$point_estimates,
                                 SE_estimates = results$results_mult_reg$SE_estimates,
                                 pvalues = results$results_mult_reg$pvalues)
        rownames(res_mr_out) <- paste("MR", rownames(res_mr_out), sep = "_")
        print(paste("Results based on traditional multiple regression."))
        print(res_mr_out)
        res_out <- rbind(res_out, res_mr_out)
    }
    if ("results_res_reg" %in% names(results)) {
        res_rr_out <- data.frame(point_estimates = results$results_res_reg$point_estimates,
                                 SE_estimates = results$results_res_reg$SE_estimates,
                                 pvalues = results$results_res_reg$pvalues)
        rownames(res_rr_out) <- paste("RR", rownames(res_rr_out), sep = "_")
        print(paste("Results based on traditional regression of residuals."))
        print(res_rr_out)
        res_out <- rbind(res_out, res_rr_out)
    }
    if ("results_sem" %in% names(results)) {
        res_sem_out <- data.frame(point_estimates = results$results_sem$point_estimates,
                                  SE_estimates = results$results_sem$SE_estimates,
                                  pvalues = results$results_sem$pvalues)
        rownames(res_sem_out) <- paste("SEM", rownames(res_sem_out), sep = "_")
        print(paste("Results based on structural equation modeling."))
        print(res_sem_out)
        res_out <- rbind(res_out, res_sem_out)
    }
    invisible(res_out)
}

## ------------------------------------------------------------------------
summary(results_ciee)
summary(results_ciee_loop)

Try the CIEE package in your browser

Any scripts or data that you put into this service are public.

CIEE documentation built on May 2, 2019, 6:39 a.m.