# In sahirbhatnagar/shim: High Dimensional Interactions with an Environment Variable

library(knitr)
library(data.table)
library(magrittr)
library(ggplot2)
library(latex2exp)
library(dplyr)
library(plyr)
library(glmnet)
library(stringr)
library(DT)
library(progress)


# Simulate Data

set.seed(123456)

# number of predictors
p = 10

# number of test subjects
n = 200

# correlation between X's
rho = 0.50

# signal to noise ratio
signal_to_noise_ratio = 4

# names of the main effects, this will be used in many of the functions
main_effect_names <- paste0("x",1:p)

# names of the active set
true_var_names <- c("x1","x2","x3","x4","x1:x2", "x1:x3", "x1:x4", "x2:x3", "x2:x4", "x3:x4")

# different true coefficient vectors as in Table 1 of Choi et al.
beta1 <- c(7,2,1,1,0,0,0,0,0,0) %>% magrittr::set_names(true_var_names)
beta2 <- c(7,2,1,1,1,0,0,0.5,0.4,0.1) %>% magrittr::set_names(true_var_names)
beta3 <- c(7,2,1,1,7,7,7,2,2,1) %>% magrittr::set_names(true_var_names)
beta4 <- c(7,2,1,1,14,14,14,4,4,2) %>% magrittr::set_names(true_var_names)
beta5 <- c(0,0,0,0,7,7,7,2,2,1) %>% magrittr::set_names(true_var_names)

# simulate Toeplitz like correlation structure between X's
H <- abs(outer(1:p, 1:p, "-"))
cor <- rho^H

# generate X's from multivariate normal and label the matrix
DT <- MASS::mvrnorm(n = n, mu = rep(0,p), Sigma = cor) %>%
magrittr::set_colnames(paste0("x",1:p)) %>%
set_rownames(paste0("Subject",1:n))

# create X matrix which contains all main effects and interactions
# but not the intercept
X <- model.matrix(
as.formula(paste0("~(",paste0(main_effect_names, collapse = "+"),")^2-1")),
data = DT %>% as.data.frame())

# generate response with user defined signal to noise ratio
y.star <- X[,names(beta1)] %*% beta1
error <- rnorm(n)
k <- sqrt(var(y.star)/(signal_to_noise_ratio*var(error)))
Y <- y.star + k*error
colnames(Y) <- "Y"

# names of interaction variables assuming interaction terms contain a ":"
# this will be used in many of the functions
# names must appear in the same order as X matrix
interaction_names <- grep(":", colnames(X), value = T)
main_effect_names <- setdiff(colnames(X), interaction_names)

DT::datatable(X)


# Analysis

## Running the Strong heredity interaction model once using the shim function

NOTE: if lambda.beta = NULL and lambda.gamma = NULL then this function will use a grid of tuning parameters based on nlambda.beta only and the nlambda.gamma parameter is ignored. Therefore, if for example nlambda.beta = 10 then you must specify nlambda = 10x10 = 100 otherwise the function will break.

# load eclust library
library(eclust)

res <- shim(x = X, y = Y,
main.effect.names = main_effect_names,
interaction.names = interaction_names,
verbose = FALSE)

names(res)
res
plot(res)




### Sequence of Tuning Parameters

DT::datatable(res\$tuning.parameters, options = list(pageLength = 100))


## Cross Validation using the cv.shim function

library(doMC)
registerDoMC(cores = 4)
cv.res <- cv.shim(x = X, y = Y,
main.effect.names = main_effect_names,
interaction.names = interaction_names,
parallel = TRUE, verbose = FALSE,
type.measure = c("mse"),
nfolds = 5)

names(cv.res)


### Cross Validation Plot

plot(cv.res)


### Coefficient Estimates

coef(cv.res, s = "lambda.1se")
coef(cv.res, s = "lambda.min")


sahirbhatnagar/shim documentation built on May 25, 2017, 11:36 p.m.