SM_User_Models.R
In StratifiedMedicine: Stratified Medicine

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width=9.5, fig.height=8.5 
)

## ----sim_ctns0----------------------------------------------------------------
library(StratifiedMedicine)
dat_ctns = generate_subgrp_data(family="gaussian")
Y = dat_ctns$Y
X = dat_ctns$X # 50 covariates, 46 are noise variables, X1 and X2 are truly predictive
A = dat_ctns$A # binary treatment, 1:1 randomized 

## ----table_steps, echo=FALSE--------------------------------------------------
library(knitr)
summ.table = data.frame( `Model` = c("filter", "ple", "submod", "param"),
                        `Required Outputs` = c("filter.vars", "list(mod,pred.fun)",
                                               "list(mod,pred.fun)", 
                                               "param.dat"),
                        `Description` = c("Variables that pass filter",
                                          "Model fit(s) and prediction function",
                                          "Model fit(s) and prediction function",
                                          "Treatment Effect Estimates") )
kable( summ.table, caption = "Key Outputs by Model" )

## ----user_filter_template-----------------------------------------------------
filter_template = function(Y, A, X, ...){
  # Step 1: Fit Filter Model #
  mod <- # model call 
  # Step 2: Extract variables that pass the filter #
  filter.vars <- # depends on mod fit
  # Return model fit and filtered variables #
  res = list(mod=mod, filter.vars=filter.vars)
  return( res )
}

## ----user_filter--------------------------------------------------------------
filter_lasso = function(Y, A, X, lambda="lambda.min", family="gaussian", ...){
  require(glmnet)
  ## Model matrix X matrix #
  X = model.matrix(~. -1, data = X )

  ##### Elastic Net ##
  set.seed(6134)
  if (family=="survival") { family = "cox"  }
  mod <- cv.glmnet(x = X, y = Y, nlambda = 100, alpha=1, family=family)

  ### Extract filtered variable based on lambda ###
  VI <- coef(mod, s = lambda)[,1]
  VI = VI[-1]
  filter.vars = names(VI[VI!=0])
  return( list(filter.vars=filter.vars) )
}

## ----user_ple_template--------------------------------------------------------
ple_template <- function(Y, A, X, ...){
  # Step 1: Fit PLE Model #
  # for example: Estimate E(Y|A=1,X), E(Y|A=0,X), E(Y|A=1,X)-E(Y|A=0,X)
  mod <- # ple model call 
  # mod = list(mod0=mod0, mod1=mod1) # If multiple fitted models, combine into list
  # Step 2: Predictions
  # Option 1: Create a Prediction Function #
  pred.fun <- function(mod, X, ...){
    mu_hat <- # data-frame of predictions 
    return(mu_hat)
  }
  # Option 2: Directly Output Predictions (here, we still use pred.fun) #
  mu_train <- pred.fun(mod, X)
  mu_test <- pred.fun(mod, Xtest)
      
  # Return model fits and pred.fun (or just mu_train/mu_test) #
  res <- list(mod=mod, pred.fun=pred.fun, mu_train=mu_train, mu_test=mu_test)
  return( res )
}

## ----user_ple-----------------------------------------------------------------
ple_ranger_mtry = function(Y, X, mtry=5, ...){
   require(ranger)
    train =  data.frame(Y=Y, X)
    mod <- ranger(Y ~ ., data = train, seed=1, mtry = mtry)
    mod = list(mod=mod)
    pred.fun <- function(mod, X, ...){
      mu_hat <- predict(mod$mod, X)$predictions
      mu_hat <- mu_hat
      return(mu_hat)
    }
    res = list(mod=mod, pred.fun=pred.fun)
    return(res)
}

## ----user_submod_template-----------------------------------------------------
submod_template <- function(Y, A, X, Xtest, mu_train, ...){
  # Step 1: Fit subgroup model #
  mod <- # model call 
  # Step 2: Predictions #
  # Option 1: Create Prediction Function #
  pred.fun <- function(mod, X=NULL, ...){
    Subgrps <- # Predict subgroup assignment
    return( list(Subgrps=Subgrps) )
  }
  # Option 2: Output Subgroups for train/test (here we use pred.fun)
  Subgrps.train = pred.fun(mod, X)
  Subgrps.test = pred.fun(mod, X)
  #Return fit and pred.fun (or just Subgrps.train/Subgrps.test)
  res <- list(mod=mod, pred.fun=pred.fun, Subgrps.train=Subgrps.train,
                  Subgrps.test=Subgrps.test)
  return(res)
}

## ----user_submod--------------------------------------------------------------
submod_lmtree_pred = function(Y, A, X, mu_train, ...){
  require(partykit)
  ## Fit Model ##
  mod <- lmtree(Y~A | ., data = X, parm=2) ##parm=2 focuses on treatment interaction #
  pred.fun <- function(mod, X=NULL, type="subgrp"){
     Subgrps <- NULL
     Subgrps <- as.numeric( predict(mod, type="node", newdata = X) )
     return( list(Subgrps=Subgrps) )
  }
  ## Return Results ##
  return(list(mod=mod, pred.fun=pred.fun))
}

## ----user_param_template------------------------------------------------------
param_template <- function(Y, A, X, mu_hat, alpha,...){
  # Key Outputs: Subgroup specific and overall parameter estimates
  mod <- # Call parameter model #
  # Extract estimates/variability and combine #
  param.dat <- data.frame(n=n, estimand="mu_1-mu_0", 
                          est=est, SE=SE, LCL=LCL, UCL=UCL, pval=pval)
  return(param.dat)
}

## ----user_param---------------------------------------------------------------
### Robust linear Regression: E(Y|A=1) - E(Y|A=0) ###
param_rlm = function(Y, A, alpha, ...){
  require(MASS)
  indata = data.frame(Y=Y,A=A)
  rlm.mod = tryCatch( rlm(Y ~ A , data=indata),
                       error = function(e) "param error" )
  n = dim(indata)[1]
  est = summary(rlm.mod)$coefficients[2,1]
  SE = summary(rlm.mod)$coefficients[2,2]
  LCL =  est-qt(1-alpha/2, n-1)*SE
  UCL =  est+qt(1-alpha/2, n-1)*SE
  pval = 2*pt(-abs(est/SE), df=n-1)
  param.dat <- data.frame(N= n, estimand = "mu_1-mu_0",
                     est=est, SE=SE, LCL=LCL, UCL=UCL, pval=pval)
  return(param.dat)
}


## ----user_SM_final, warnings=FALSE, message=FALSE-----------------------------
# Individual Steps #
step1 <- filter_train(Y, A, X, filter="filter_lasso")
X.star <- X[,colnames(X) %in% step1$filter.vars]
step2 <- ple_train(Y, A, X.star, ple = "ple_ranger_mtry", meta="X-learner")
plot_ple(step2)
step3 <- submod_train(Y, A, X.star, submod = "submod_lmtree_pred", param="param_rlm")
plot_tree(step3)

# Combine all through PRISM #
res_user1 = PRISM(Y=Y, A=A, X=X, family="gaussian", filter="filter_lasso", 
             ple = "ple_ranger_mtry", meta="X-learner", 
             submod = "submod_lmtree_pred",
             param="param_rlm")
res_user1$filter.vars
plot(res_user1, type="PLE:waterfall")
plot(res_user1)