Load Required Packages

library(knitr)
library(data.table)
library(magrittr)
library(ggplot2)
library(latex2exp)
library(dplyr)
library(plyr)
library(glmnet)
library(stringr)
library(DT)
library(progress)

Simulate Data

set.seed(123456)

# number of predictors
p = 10 

# number of test subjects
n = 200

# correlation between X's
rho = 0.50

# signal to noise ratio
signal_to_noise_ratio = 4

# names of the main effects, this will be used in many of the functions
main_effect_names <- paste0("x",1:p) 

# names of the active set
true_var_names <- c("x1","x2","x3","x4","x1:x2", "x1:x3", "x1:x4", "x2:x3", "x2:x4", "x3:x4")

# different true coefficient vectors as in Table 1 of Choi et al. 
beta1 <- c(7,2,1,1,0,0,0,0,0,0) %>% magrittr::set_names(true_var_names)
beta2 <- c(7,2,1,1,1,0,0,0.5,0.4,0.1) %>% magrittr::set_names(true_var_names)
beta3 <- c(7,2,1,1,7,7,7,2,2,1) %>% magrittr::set_names(true_var_names)
beta4 <- c(7,2,1,1,14,14,14,4,4,2) %>% magrittr::set_names(true_var_names)
beta5 <- c(0,0,0,0,7,7,7,2,2,1) %>% magrittr::set_names(true_var_names)

# simulate Toeplitz like correlation structure between X's
H <- abs(outer(1:p, 1:p, "-"))
cor <- rho^H

# generate X's from multivariate normal and label the matrix
DT <- MASS::mvrnorm(n = n, mu = rep(0,p), Sigma = cor) %>% 
    magrittr::set_colnames(paste0("x",1:p)) %>% 
    set_rownames(paste0("Subject",1:n))

# create X matrix which contains all main effects and interactions
# but not the intercept
X <- model.matrix(
    as.formula(paste0("~(",paste0(main_effect_names, collapse = "+"),")^2-1")), 
    data = DT %>% as.data.frame()) 

# generate response with user defined signal to noise ratio 
y.star <- X[,names(beta1)] %*% beta1
error <- rnorm(n)
k <- sqrt(var(y.star)/(signal_to_noise_ratio*var(error))) 
Y <- y.star + k*error 
colnames(Y) <- "Y"

# names of interaction variables assuming interaction terms contain a ":"
# this will be used in many of the functions
# names must appear in the same order as X matrix
interaction_names <- grep(":", colnames(X), value = T)
main_effect_names <- setdiff(colnames(X), interaction_names)
DT::datatable(X)

Analysis

Running the Strong heredity interaction model once using the shim function

NOTE: if lambda.beta = NULL and lambda.gamma = NULL then this function will use a grid of tuning parameters based on nlambda.beta only and the nlambda.gamma parameter is ignored. Therefore, if for example nlambda.beta = 10 then you must specify nlambda = 10x10 = 100 otherwise the function will break.

# load eclust library
library(eclust)

res <- shim(x = X, y = Y,
            main.effect.names = main_effect_names,
            interaction.names = interaction_names,
            verbose = FALSE)

names(res)
res
plot(res)

Main effect ($\beta$) parameter estimates

DT::datatable(round(as.matrix(res$beta),2), options = list(pageLength = 100))

Interaction effect ($\alpha$) parameter estimates

DT::datatable(round(as.matrix(res$alpha),2), options = list(pageLength = 100))

Sequence of Tuning Parameters

DT::datatable(res$tuning.parameters, options = list(pageLength = 100))

Cross Validation using the cv.shim function

library(doMC)
registerDoMC(cores = 4)
cv.res <- cv.shim(x = X, y = Y,
            main.effect.names = main_effect_names,
            interaction.names = interaction_names,
            parallel = TRUE, verbose = FALSE,
            type.measure = c("mse"), 
            nfolds = 5)

names(cv.res)

Cross Validation Plot

plot(cv.res)

Coefficient Estimates

coef(cv.res, s = "lambda.1se")
coef(cv.res, s = "lambda.min")


sahirbhatnagar/eclustProject documentation built on May 29, 2019, 12:58 p.m.