##' Generates a wrapper for SuperLearner using HDPS
##'
##' A HDPS candidate will generate covariates using \code{hdps_screen} from
##' codes, and estimate the propensity score with logistic regression on
##' generated covariates and predefined covariates.
##'
##' To use HDPS in SuperLearner to estimate a propensity score, you need to
##' include the outcome variable as a covariate where here outcome means the
##' outcome of interest in the causal problem as opposed to the \code{Y}
##' variable in SuperLearner. For non-HDPS candidates in SuperLearner, it's
##' important to exclude the outcome variable via \code{\link[=screen]{screen.named}} or
##' some other screening algorithm in order to avoid adjusting for something
##' downstream on the causal pathway.
##' @title SL.hdps.generator
##' @param out_name Name of the outcome variable.
##' @param dimension_names Dimension names of HDPS dimensions. See
##' \code{\link{hdps_screen}}.
##' @param predef_covar_names Names of predefined covariates to be included in
##' logistic regression model.
##' @param keep_k_total See \code{\link{hdps_screen}}.
##' @param ... Other arguments passed to \code{\link{hdps_screen}}.
##' @param cvglmnet Use \code{glmnet} or \code{cv.glmnet} for fitting. Defaults to FALSE.
##' @param glmnet_args list of arguments to be passed to glmnet or cv.glmnet. If \code{cvglmnet=FALSE}, \code{glmnet_args}
##' should be set such that calling \code{predict} on the \code{glmnet} object returns only one vector of predictions.
##' E.g. only one value of \code{lambda} should be set.
##' @return A SuperLearner wrapper function
##' @author Sam Lendle
##' @importFrom Matrix sparse.model.matrix
##' @importFrom glmnet glmnet
##' @importFrom glmnet cv.glmnet
##' @importFrom stats cor
##' @export
SL.hdps.generator <- function(out_name, dimension_names, predef_covar_names=c(), keep_k_total, ...,
cvglmnet=FALSE, glmnet_args=if (cvglmnet) list() else list(lambda=0)) {
function(Y, X, newX, family, obsWeights, id) {
if (missing(newX)) {
newX <- X
}
if(family$family == 'gaussian') {
stop("SL.hdps only for binomial")
}
hdps_fit <- hdps_screen(X[, out_name], Y, X, dimension_names, keep_k_total=keep_k_total, ...)
predef_covars <- X[, predef_covar_names]
if (keep_k_total > 0) {
hdps_covars <- predict(hdps_fit)
hdps_keep <- colnames(hdps_covars)[abs(cor(Y, hdps_covars)) <= 0.95]
hdps_covars <- hdps_covars[, hdps_keep]
df = as.data.frame(cbind(predef_covars, hdps_covars))
} else {
hdps_keep <- NULL
df = as.data.frame(predef_covars)
}
smm <- sparse.model.matrix(~.-1, df)
myglmnet <- function(...) if (cvglmnet)
cv.glmnet(smm, Y, family="binomial") else
glmnet(smm, Y, family="binomial", ...)
glmnet_fit <- do.call(myglmnet, glmnet_args)
if (identical(X, newX)) {
smmnew <- smm
} else {
new_predef_covars <- newX[, predef_covar_names]
if (keep_k_total > 0) {
new_hdps_covars <- predict(hdps_fit, newdata=newX)
new_hdps_covars <- new_hdps_covars[, hdps_keep]
new_df = as.data.frame(cbind(new_predef_covars, new_hdps_covars))
} else {
new_df = as.data.frame(new_predef_covars)
}
smmnew <- sparse.model.matrix(~.-1, new_df)
}
pred <- predict(glmnet_fit, smmnew, type="response")
if (ncol(pred) != 1) stop("Check cvglmnet and glmnet_args arguments to insure that predict returns only one column")
# fit returns all objects needed for predict.SL.template
fit <- list(glmnet_fit = glmnet_fit, hdps_fit = hdps_fit,
predef_covar_names=predef_covar_names, hdps_keep=hdps_keep, keep_k_total=keep_k_total)
# declare class of fit for predict.SL.template
class(fit) <- 'SL_hdps'
# return a list with pred and fit
out <- list(pred = pred, fit = fit)
return(out)
}
}
#' Get predictions from SL_hdps wrapper
#'
#' @title Get predictions from SL_hdps wrapper
#' @param object object of class \code{SL_hdps}
#' @param newdata a matrix of covariates to predict from
#' @param ... ignored
#' @return vector of predictions
#' @author Sam Lendle
#' @importFrom stats predict
#' @export
predict.SL_hdps <- function(object, newdata, ...){
new_predef_covars <- newdata[, object$predef_covar_names]
new_hdps_covars <- predict(object$hdps_fit, newdata=newdata, keep_k_total=object$keep_k_total)
new_hdps_covars <- new_hdps_covars[, object$hdps_keep]
new_df <- cbind(new_predef_covars, new_hdps_covars)
smmnew <- sparse.model.matrix(~.-1, new_df)
pred <- predict(object$glmnet_fit, smmnew, type = "response")
if (ncol(pred) != 1) stop("Check cvglmnet and glmnet_args arguments to insure that predict returns only one column")
pred
}
##' @name screen
##' @rdname screen
##'
##' @title SuperLearner screening wrappers
##'
##' @description Functions to set up screening wrappers for SuperLearner
##'
##' @param names Names to be included or excluded
##'
##' These functions generate simple screening wrappers for SuperLearner to
##' include or exclude variables based on \code{names}. This is is helpful
##' because in order to use HDPS as a candidate in SuperLearner, you need to
##' include the study outcome variable as a covariate. But to use a non-HDPS
##' algorithm, (say a random forest on some specified set of covariates,) as a
##' candidate as well, you want to make sure you're not adjusting for the
##' outcome which is downstream from treatment on the causal pathway.
##'
##' See documentation for the SuperLearner package for more about screening algorithms.
##'
##' @examples
##'
##' screen.predefined <- screen.names(c("names", "of", "predefined",
##' "covariates", "that", "definitely", "dont", "include", "the", "outcome"))
##'
##' screen.notoutcome <- screen.excludenames(c("outcome_variable_name",
##' "and", "other", "covariates", "to", "exclude"))
##'
NULL
##' @rdname screen
##' @export
screen.names <- function (names) {
function (Y, X, family, obsWeights, id, ...) {
colnames(X) %in% names
}
}
##' @rdname screen
##' @export
screen.excludenames <- function (names) {
function (Y, X, family, obsWeights, id, ...) {
!(colnames(X) %in% names)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.