#' Conducting Score Tests for Interaction
#'
#' Conduct score tests comparing a fitted model and a more general alternative
#' model.
#'
#' There are two tests available here:
#'
#' \bold{Asymptotic Test}
#'
#' This is based on the classical variance component test to construct a
#' testing procedure for the hypothesis about Gaussian process function.
#'
#' \bold{Bootstrap Test}
#'
#' When it comes to small sample size, we can use bootstrap test instead, which
#' can give valid tests with moderate sample sizes and requires similar
#' computational effort to a permutation test.
#'
#' @param formula_int (formula) A symbolic description of the model with
#' interaction.
#' @param label_names (list) A character string indicating all the interior
#' variables included in each predictor.
#' @param Y (vector of length n) Reponses of the dataframe.
#' @param X1 (dataframe, n*p1) The first type of factor in the dataframe (could
#' contains several subfactors).
#' @param X2 (dataframe, n*p2) The second type of factor in the dataframe (could
#' contains several subfactors).
#' @param kern_list (list of length K) A list of kernel functions given by user.
#' @param mode (character) A character string indicating which tuning
#' parameter criteria is to be used.
#' @param strategy (character) A character string indicating which ensemble
#' strategy is to be used.
#' @param beta (numeric/character) A numeric value specifying the parameter
#' when strategy = "exp" \code{\link{ensemble_exp}}.
#' @param test (character) A character string indicating which test is
#' to be used.
#' @param lambda (numeric) A numeric string specifying the range of
#' noise to be chosen. The lower limit of lambda must be above 0.
#' @param B (integer) A numeric value indicating times of resampling w
#' hen test = "boot".
#' @return \item{pvalue}{(numeric) p-value of the test.}
#' \item{u_weight}{(numeric) p-value of the test.}
#' \item{lam}{(numeric) p-value of the test.}
#' \item{train_RMSE}{(numeric) p-value of the test.}
#' \item{test_RMSE}{(numeric) p-value of the test.}
#' \item{K_tr}{(numeric) p-value of the test.}
#' \item{A_tr}{(numeric) p-value of the test.}
#' \item{V0_inv_tr}{(numeric) p-value of the test.}
#' \item{K_eig}{(numeric) p-value of the test.}
#' @author Wenying Deng
#' @seealso method: \code{\link{generate_kernel}}
#'
#' mode: \code{\link{tuning}}
#'
#' strategy: \code{\link{ensemble}}
#' @references Xihong Lin. Variance component testing in generalised linear
#' models with random effects. June 1997.
#'
#' Arnab Maity and Xihong Lin. Powerful tests for detecting a gene effect in
#' the presence of possible gene-gene interactions using garrote kernel
#' machines. December 2011.
#'
#' Petra Bu ̊zˇkova ́, Thomas Lumley, and Kenneth Rice. Permutation and
#' parametric bootstrap tests for gene-gene and gene-environment interactions.
#' January 2011.
#' @examples
#'
#'
#' testing(formula_int = Y ~ X1 * X2,
#' label_names = list(X1 = c("x1", "x2"), X2 = c("x3", "x4")),
#' Y, X1, X2, kern_list, mode = "loocv", strategy = "erm",
#' beta = 1, test = "boot", lambda = exp(seq(-5, 5)), B = 100)
#'
#'
#' @export testing
testing <- function(formula_int, label_names, Y, X1, X2, kern_list,
mode = "loocv", strategy = "erm", beta = 1,
test = "boot", lambda_list = exp(seq(-5, 5)),
B = 100, data_test = NULL, fit_test = NULL) {
re <- generate_formula(formula_int, label_names)
generic_formula0 <- re$generic_formula
len <- re$length_main
data <- as.data.frame(cbind(Y, X1, X2))
colnames(data) <- c("Y", label_names[[1]], label_names[[2]])
X <- model.matrix(generic_formula0, data)[, -1]
X12 <- X[, c((len + 1):dim(X)[2])]
n <- length(Y)
result <- estimation(Y, X1, X2, kern_list, mode, strategy, beta, lambda_list)
lambda <- result$lambda
beta0 <- result$beta[1, 1]
alpha0 <- result$alpha
K_gpr <- result$K
u_weight <- result$u_hat
base_est <- result$base_est
noise_hat <- estimate_noise(Y, lambda, beta0, alpha0, K_gpr)
sigma2_hat <- noise_hat$sigma2_hat
tau_hat <- sigma2_hat / lambda
test <- match.arg(test, c("asym", "boot"))
func_name <- paste0("test_", test)
pvalue <- do.call(func_name, list(n = n, Y = Y, X12 = X12,
beta0 = beta0, alpha0 = alpha0,
K_gpr = K_gpr, sigma2_hat = sigma2_hat,
tau_hat = tau_hat, B = B))
if (!is.null(data_test) & !is.null(fit_test)) {
kern_size <- length(kern_list)
Yhat_test <- 0
for (d in seq(kern_size)) {
kern <- kern_list[[d]]
K1_m <- kern(fit_test$X1, X1)
K2_m <- kern(fit_test$X2, X2)
K_test <- K1_m + K2_m
Yhat_base <- base_est$beta_list[[d]][1, 1] +
K_test %*% base_est$alpha_list[[d]]
Yhat_test <- Yhat_test + u_weight[d] * Yhat_base
}
SSE_test <- sum((fit_test$Y - Yhat_test) ^ 2)
test_RMSE <- sqrt(SSE_test / n)
} else {
test_RMSE <- -1
}
SSE_train <- sum((Y - beta0 - K_gpr %*% alpha0) ^ 2)
train_RMSE <- sqrt(SSE_train / n)
V0_inv <-
compute_stat(n, Y, X12, beta0, K_gpr, sigma2_hat, tau_hat)$V0_inv
K_eig <- svd(K_gpr)$d
list(pvalue = pvalue, u_weight = u_weight,
lambda = lambda, train_RMSE = train_RMSE,
test_RMSE = test_RMSE, trtst_ratio = train_RMSE / test_RMSE,
K_tr = tr(K_gpr), A_tr = tr(noise_hat$A),
V0_inv_tr = tr(V0_inv), K_eig = K_eig)
}
#' Conducting Score Tests for Interaction Using Asymptotic Test
#'
#' Conduct score tests comparing a fitted model and a more general alternative
#' model using asymptotic test.
#'
#' \bold{Asymptotic Test}
#'
#' This is based on the classical variance component test to construct a
#' testing procedure for the hypothesis about Gaussian process function.
#'
#' @param n (integer) A numeric number specifying the number of observations.
#' @param Y (vector of length n) Reponses of the dataframe.
#' @param X12 (dataframe, n*(p1\*p2)) The interaction items of first and second
#' types of factors in the dataframe.
#' @param beta0 (numeric) Estimated bias of the model.
#' @param alpha0 (vector of length n) Estimated coefficients of the estimated
#' ensemble kernel matrix.
#' @param K_gpr (matrix, n*n) Estimated ensemble kernel matrix.
#' @param sigma2_hat (numeric) The estimated noise of the fixed effects.
#' @param tau_hat (numeric) The estimated noise of the random effects.
#' @param B (integer) A numeric value indicating times of resampling
#' when test = "boot".
#' @return \item{pvalue}{(numeric) p-value of the test.}
#' @author Wenying Deng
#' @seealso method: \code{\link{generate_kernel}}
#'
#' mode: \code{\link{tuning}}
#'
#' strategy: \code{\link{ensemble}}
#' @references Xihong Lin. Variance component testing in generalised linear
#' models with random effects. June 1997.
#'
#' Arnab Maity and Xihong Lin. Powerful tests for detecting a gene effect in
#' the presence of possible gene-gene interactions using garrote kernel
#' machines. December 2011.
#'
#' Petra Bu ̊zˇkova ́, Thomas Lumley, and Kenneth Rice. Permutation and
#' parametric bootstrap tests for gene-gene and gene-environment interactions.
#' January 2011.
#'
#' @export test_asym
test_asym <- function(n, Y, X12, beta0, alpha0,
K_gpr, sigma2_hat, tau_hat, B) {
score_chi <-
compute_stat(n, Y, X12, beta0, K_gpr, sigma2_hat, tau_hat)$test_stat
K0 <- K_gpr
K12 <- X12 %*% t(X12)
V0_inv <- ginv(tau_hat * K0 + sigma2_hat * diag(n))
one <- rep(1, n)
P0_mat <- V0_inv - V0_inv %*%
one %*% ginv(t(one) %*% V0_inv %*% one) %*% t(one) %*% V0_inv
drV0_tau <- K0
drV0_sigma2 <- diag(n)
drV0_del <- tau_hat * K12
I0 <- compute_info(P0_mat,
mat_del = drV0_del, mat_sigma2 = drV0_sigma2,
mat_tau = drV0_tau)
#Effective Info for delta
tot_dim <- ncol(I0)
I_deldel <-
I0[1, 1] -
I0[1, 2:tot_dim] %*% ginv(I0[2:tot_dim, 2:tot_dim]) %*% I0[2:tot_dim, 1]
md <- tau_hat * tr(K12 %*% P0_mat) / 2
m_chi <- I_deldel / (2 * md)
d_chi <- md / m_chi
pvalue <- 1 - pchisq(score_chi / m_chi, d_chi)
pvalue
}
#' Conducting Score Tests for Interaction Using Bootstrap Test
#'
#' Conduct score tests comparing a fitted model and a more general alternative
#' model using bootstrap test.
#'
#' \bold{Bootstrap Test}
#'
#' When it comes to small sample size, we can use bootstrap test instead, which
#' can give valid tests with moderate sample sizes and requires similar
#' computational effort to a permutation test.
#'
#' @param n (integer) A numeric number specifying the number of observations.
#' @param Y (vector of length n) Reponses of the dataframe.
#' @param X12 (dataframe, n*(p1\*p2)) The interaction items of first and second
#' types of factors in the dataframe.
#' @param beta0 (numeric) Estimated bias of the model.
#' @param alpha0 (vector of length n) Estimated coefficients of the estimated
#' ensemble kernel matrix.
#' @param K_gpr (matrix, n*n) Estimated ensemble kernel matrix.
#' @param sigma2_hat (numeric) The estimated noise of the fixed effects.
#' @param tau_hat (numeric) The estimated noise of the random effects.
#' @param B (integer) A numeric value indicating times of resampling
#' when test = "boot".
#' @return \item{pvalue}{(numeric) p-value of the test.}
#' @author Wenying Deng
#' @seealso method: \code{\link{generate_kernel}}
#'
#' mode: \code{\link{tuning}}
#'
#' strategy: \code{\link{ensemble}}
#' @references Xihong Lin. Variance component testing in generalised linear
#' models with random effects. June 1997.
#'
#' Arnab Maity and Xihong Lin. Powerful tests for detecting a gene effect in
#' the presence of possible gene-gene interactions using garrote kernel
#' machines. December 2011.
#'
#' Petra Bu ̊zˇkova ́, Thomas Lumley, and Kenneth Rice. Permutation and
#' parametric bootstrap tests for gene-gene and gene-environment interactions.
#' January 2011.
#'
#' @export test_boot
test_boot <- function(n, Y, X12, beta0, alpha0,
K_gpr, sigma2_hat, tau_hat, B) {
meanY <- K_gpr %*% alpha0 + beta0
lam_star <- sigma2_hat / tau_hat
bs_test <- sapply(1:B, function(k) {
Ystar <- meanY + rnorm(n, sd = sqrt(sigma2_hat))
sigma2_star <- estimate_noise(Ystar, lam_star, beta0, alpha0, K_gpr)$sigma2_hat
tau_star <- sigma2_star / lam_star
compute_stat(n, Ystar, X12, beta0, K_gpr, sigma2_star, tau_star)$test_stat
})
# assemble test statistic
original_test <-
compute_stat(n, Y, X12, beta0, K_gpr, sigma2_hat, tau_hat)$test_stat
pvalue <- sum(as.numeric(original_test) <= bs_test) / B
pvalue
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.