Nothing
## ----message=FALSE, warning=FALSE---------------------------------------------
library("caret")
library("pre")
library("pROC")
library("ggplot2")
## -----------------------------------------------------------------------------
BigSummary <- function (data, lev = NULL, model = NULL) {
brscore <- try(mean((data[, lev[2]] - ifelse(data$obs == lev[2], 1, 0)) ^ 2),
silent = TRUE)
rocObject <- try(pROC::roc(ifelse(data$obs == lev[2], 1, 0), data[, lev[2]],
direction = "<", quiet = TRUE), silent = TRUE)
if (inherits(brscore, "try-error")) brscore <- NA
rocAUC <- if (inherits(rocObject, "try-error")) {
NA
} else {
rocObject$auc
}
return(c(AUCROC = rocAUC, Brier = brscore))
}
## -----------------------------------------------------------------------------
fitControl <- trainControl(method = "repeatedcv", number = 10, repeats = 1,
classProbs = TRUE, ## get probabilities, not class labels
summaryFunction = BigSummary, verboseIter = TRUE)
## -----------------------------------------------------------------------------
preGrid <- getModelInfo("pre")[[1]]$grid(
maxdepth = 3L:4L,
learnrate = c(.01, .05, .1),
penalty.par.val = c("lambda.1se", "lambda.min"),
sampfrac = c(0.5, 0.75, 1.0))
head(preGrid)
## -----------------------------------------------------------------------------
carrillo$sexo <- factor(paste0("g", as.character(carrillo$sexo)))
## -----------------------------------------------------------------------------
set.seed(42)
train_ids <- sample(1:nrow(carrillo), .75*nrow(carrillo))
## ----eval=FALSE---------------------------------------------------------------
# set.seed(42)
# pre_tune <- train(sexo ~ ., data = carrillo[train_ids, ], method = "pre",
# ntrees = 500, family = "binomial",
# trControl = fitControl, tuneGrid = preGrid,
# metric = "Brier", ## Specify "AUCROC" for optimizing AUC
# maximize = TRUE)
## ----eval=FALSE---------------------------------------------------------------
# set.seed(42)
# pre_tune2 <- train(sexo ~ ., data = carrillo[train_ids, ], method = "pre",
# ntrees = 1000, family = "binomial",
# trControl = fitControl, tuneGrid = preGrid,
# metric = "Brier", maximize = TRUE)
## ----echo=FALSE, eval=FALSE---------------------------------------------------
# save(pre_tune, pre_tune2, file = "Tuning_results.Rda")
## ----echo=FALSE---------------------------------------------------------------
load("Tuning_results.Rda")
## ----fig.width=5, fig.height=4------------------------------------------------
ids <- which(pre_tune$results$Brier == min(pre_tune$results$Brier))
pre_tune$results[ids, c(1:6, 10)]
plot(pre_tune,
xlab = list(cex = .7), ylab = list(cex = .7),
scales = list(cex=.7),
par.strip.text=list(cex=.7))
## ----fig.width=5, fig.height=4------------------------------------------------
ids2 <- which(pre_tune2$results$Brier == min(pre_tune2$results$Brier))
pre_tune2$results[ids2, c(1:6, 10)]
plot(pre_tune2,
xlab = list(cex = .7), ylab = list(cex = .7),
scales = list(cex=.7),
par.strip.text=list(cex=.7))
## -----------------------------------------------------------------------------
set.seed(42)
opt_pre_mod <- pre(formula = sexo ~ ., data = carrillo[train_ids, ],
sampfrac = 1, maxdepth = 4, ntrees = 1000, family = "binomial")
## -----------------------------------------------------------------------------
set.seed(42)
def_pre_mod <- pre(formula = sexo ~ ., data = carrillo[train_ids, ],
family = "binomial")
## ----fig.width=5, fig.height=3------------------------------------------------
print(opt_pre_mod, penalty.par.val = "lambda.1se")
importance(opt_pre_mod, penalty.par.val = "lambda.1se", cex = .7,
cex.main = .7, cex.lab = .7)
## ----warning=FALSE, message=FALSE---------------------------------------------
pre_preds_opt <- predict(opt_pre_mod, newdata = carrillo[-train_ids, ],
type = "response", penalty.par.val = "lambda.1se")
y_test <- as.numeric(carrillo[-train_ids, "sexo"])-1
mean((pre_preds_opt - y_test)^2) ## Brier score
sd((pre_preds_opt - y_test)^2)/sqrt(length(y_test)) ## standard error of SEL
auc(response = carrillo[-train_ids, "sexo"], predictor = pre_preds_opt)
## ----warning=FALSE, message=FALSE---------------------------------------------
summary(def_pre_mod)
pre_preds_def <- predict(def_pre_mod, newdata = carrillo[-train_ids, ],
type = "response")
mean((pre_preds_def - y_test)^2) ## Brier score
sd((pre_preds_def - y_test)^2)/sqrt(length(y_test)) ## standard error of SEL
auc(response = carrillo[-train_ids, "sexo"], predictor = pre_preds_def)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.