Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup--------------------------------------------------------------------
library(pminternal)
library(Hmisc)
getHdata("gusto")
gusto <- gusto[, c("sex", "age", "hyp", "htn", "hrt", "pmi", "ste", "day30")]
gusto$y <- gusto$day30; gusto$day30 <- NULL
set.seed(234)
gusto <- gusto[sample(1:nrow(gusto), size = 4000),]
## -----------------------------------------------------------------------------
stepglm <- function(data, ...){
m <- glm(y~., data=data, family="binomial")
step(m, trace = 0)
}
steppred <- function(model, data, ...){
predict(model, newdata = data, type = "response")
}
validate(data = gusto, outcome = "y", model_fun = stepglm,
pred_fun = steppred, method = "cv_opt", B = 10)
## -----------------------------------------------------------------------------
#library(glmnet)
ridge_fun <- function(data, ...){
y <- data$y
x <- data[, c('sex', 'age', 'hyp', 'htn', 'hrt', 'pmi', 'ste')]
x$sex <- as.numeric(x$sex == "male")
x$pmi <- as.numeric(x$pmi == "yes")
x <- as.matrix(x)
cv <- glmnet::cv.glmnet(x=x, y=y, alpha=0, nfolds = 10, family="binomial")
lambda <- cv$lambda.min
glmnet::glmnet(x=x, y=y, alpha = 0, lambda = lambda, family="binomial")
}
ridge_predict <- function(model, data, ...){
# note this is identical to lasso_predict from "pminternal" vignette
x <- data[, c('sex', 'age', 'hyp', 'htn', 'hrt', 'pmi', 'ste')]
x$sex <- as.numeric(x$sex == "male")
x$pmi <- as.numeric(x$pmi == "yes")
x <- as.matrix(x)
plogis(glmnet::predict.glmnet(model, newx = x)[,1])
}
validate(method = "cv_optimism", data = gusto,
outcome = "y", model_fun = ridge_fun,
pred_fun = ridge_predict, B = 10)
# the use of package::function in user defined functions
# is especially important if you want to run
# boot_* or .632 in parallel via cores argument
# e.g.
# validate(method = ".632", data = gusto,
# outcome = "y", model_fun = ridge_fun,
# pred_fun = ridge_predict, B = 100, cores = 4)
## ----eval = F-----------------------------------------------------------------
# lognet_fun <- function(data, ...){
#
# dots <- list(...)
# if ("alpha" %in% names(dots)){
# alpha <- dots[["alpha"]]
# } else{
# alpha <- 0
# }
#
# y <- data$y
# x <- data[, c('sex', 'age', 'hyp', 'htn', 'hrt', 'pmi', 'ste')]
# x$sex <- as.numeric(x$sex == "male")
# x$pmi <- as.numeric(x$pmi == "yes")
# x <- as.matrix(x)
#
# cv <- glmnet::cv.glmnet(x=x, y=y, alpha = alpha, nfolds = 10, family="binomial")
# lambda <- cv$lambda.min
#
# glmnet::glmnet(x=x, y=y, alpha = alpha, lambda = lambda, family="binomial")
# }
#
# validate(method = "cv_optimism", data = gusto,
# outcome = "y", model_fun = lognet_fun,
# pred_fun = ridge_predict, B = 10, alpha = 0.5)
## ----eval = F-----------------------------------------------------------------
# enet_fun <- function(data, ...){
#
# dots <- list(...)
# if ("nalpha" %in% names(dots)){
# nalpha <- dots[["nalpha"]]
# } else{
# nalpha <- 21 # 0 to 1 in steps of 0.05
# }
#
# y <- data$y
# x <- data[, c('sex', 'age', 'hyp', 'htn', 'hrt', 'pmi', 'ste')]
# x$sex <- as.numeric(x$sex == "male")
# x$pmi <- as.numeric(x$pmi == "yes")
# x <- as.matrix(x)
#
# # run 10 fold CV for each alpha
# alphas <- seq(0, 1, length.out = nalpha)
# res <- lapply(alphas, function(a){
# cv <- glmnet::cv.glmnet(x=x, y=y, alpha = a, nfolds = 10, family="binomial")
# list(lambda = cv$lambda.min, bin.dev = min(cv$cvm))
# })
# # select result with min binomial deviance
# j <- which.min(sapply(res, function(x) x$bin.dev))
# # produce 'final' model with alpha and lambda
# glmnet::glmnet(x=x, y=y, alpha = alphas[j], lambda = res[[j]][["lambda"]], family="binomial")
# }
#
# validate(method = "cv_optimism", data = gusto,
# outcome = "y", model_fun = enet_fun,
# pred_fun = ridge_predict, B = 10)
#
## -----------------------------------------------------------------------------
rf_fun <- function(data, ...){
dots <- list(...)
num.trees <- if ("num.trees" %in% names(dots)) dots[["num.trees"]] else 500
max.depth <- if ("max.depth" %in% names(dots)) dots[["max.depth"]] else NULL
min.node.size <- if ("min.node.size" %in% names(dots)) dots[["min.node.size"]] else 1
# best to make sure y is a factor where '1' is level 2
data$y <- factor(data$y, levels = 0:1)
ranger::ranger(y~., data = data, probability = TRUE,
num.trees = num.trees,
max.depth = max.depth,
min.node.size = min.node.size)
}
rf_predict <- function(model, data, ...){
predict(model, data = data)$predictions[, 2]
}
validate(method = "cv_optimism", data = gusto,
outcome = "y", model_fun = rf_fun,
pred_fun = rf_predict, B = 10)
# instead of unlimited tree depth...
validate(method = "cv_optimism", data = gusto,
outcome = "y", model_fun = rf_fun,
pred_fun = rf_predict, B = 10, max.depth = 3)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.