Nothing
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(fig.width = 7)
## -----------------------------------------------------------------------------
vtreat::kWayStratifiedY(3,2,NULL,NULL)
## -----------------------------------------------------------------------------
# This method is not a great idea as the data could have structure that strides
# in the same pattern as this split.
# Such technically is possible for any split, but we typically use
# pseudo-random structure (that is not the same across many potential
# split calls) to try and make it unlikely such structures
# match often.
modularSplit <- function(nRows,nSplits,dframe,y) {
group <- seq_len(nRows) %% nSplits
lapply(unique(group),
function(gi) {
list(train=which(group!=gi),
app=which(group==gi))
})
}
## -----------------------------------------------------------------------------
vtreat::buildEvalSets(nRows=25,nSplits=3,splitFunction=modularSplit)
## -----------------------------------------------------------------------------
badSplit <- function(nRows,nSplits,dframe,y) {
list(list(train=seq_len(nRows),app=seq_len(nRows)))
}
vtreat::buildEvalSets(nRows=5,nSplits=3,splitFunction=badSplit)
## ----warning=FALSE------------------------------------------------------------
library('vtreat')
## -----------------------------------------------------------------------------
set.seed(23255)
d <- data.frame(y=sin(1:100))
# stratified 5-fold cross validation
pStrat <- kWayStratifiedY(nrow(d),5,d,d$y)
# check if the split is a good partition
check = vtreat::problemAppPlan(nrow(d),5,pStrat,TRUE)
if(is.null(check)) {
print("Plan is good")
} else {
print(paste0("Problem with plan: ", check))
}
d$stratGroup <- vtreat::getSplitPlanAppLabels(nrow(d),pStrat)
# unstratified 5-fold cross validation
pSimple <- kWayCrossValidation(nrow(d),5,d,d$y)
# check if the split is a good partition; return null if so
check = vtreat::problemAppPlan(nrow(d),5,pSimple,TRUE)
if(is.null(check)) {
print("Plan is good")
} else {
print(paste0("Problem with plan: ", check))
}
d$simpleGroup <- vtreat::getSplitPlanAppLabels(nrow(d),pSimple)
# mean(y) for each fold, unstratified
tapply(d$y,d$simpleGroup,mean)
# standard error of mean(y)
sd(tapply(d$y,d$simpleGroup,mean))
# mean(y) for each fold, unstratified
tapply(d$y,d$stratGroup,mean)
# standard error of mean(y)
sd(tapply(d$y,d$stratGroup,mean))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.