holdout | R Documentation |
Computes indexes for holdout data split into training and test sets.
holdout(y, ratio = 2/3, internalsplit = FALSE, mode = "stratified", iter = 1,
seed = NULL, window=10, increment=1)
y |
desired target: numeric vector; or factor – then a stratified holdout is applied (i.e. the proportions of the classes are the same for each set). |
ratio |
split ratio (in percentage – sets the training set size; or in total number of examples – sets the test set size). |
internalsplit |
if |
mode |
sampling mode. Options are:
|
iter |
iteration of the incremental retraining mode (only used when |
seed |
if |
window |
training window size (if |
increment |
number of samples added to the training window at each iteration (if |
Computes indexes for holdout data split into training and test sets.
A list with the components:
$tr – numeric vector with the training examples indexes;
$ts – numeric vector with the test examples indexes;
$itr – numeric vector with the internal training examples indexes;
$val – numeric vector with the internal validation examples indexes;
Paulo Cortez http://www3.dsi.uminho.pt/pcortez/
See fit
.
fit
, predict.fit
, mining
, mgraph
, mmetric
, savemining
, Importance
.
### simple examples:
# preserves order, last two elements go into test set
H=holdout(1:10,ratio=2,internal=TRUE,mode="order")
print(H)
# no seed or NULL returns different splits:
H=holdout(1:10,ratio=2/3,mode="random")
print(H)
H=holdout(1:10,ratio=2/3,mode="random",seed=NULL)
print(H)
# same seed returns identical split:
H=holdout(1:10,ratio=2/3,mode="random",seed=12345)
print(H)
H=holdout(1:10,ratio=2/3,mode="random",seed=12345)
print(H)
### classification example
## Not run:
data(iris)
# random stratified holdout
H=holdout(iris$Species,ratio=2/3,mode="stratified")
print(table(iris[H$tr,]$Species))
print(table(iris[H$ts,]$Species))
M=fit(Species~.,iris[H$tr,],model="rpart") # training data only
P=predict(M,iris[H$ts,]) # test data
print(mmetric(iris$Species[H$ts],P,"CONF"))
## End(Not run)
### regression example with incremental and rolling window holdout:
## Not run:
ts=c(1,4,7,2,5,8,3,6,9,4,7,10,5,8,11,6,9)
d=CasesSeries(ts,c(1,2,3))
print(d) # with 14 examples
# incremental holdout example (growing window)
for(b in 1:4) # iterations
{
H=holdout(d$y,ratio=4,mode="incremental",iter=b,window=5,increment=2)
M=fit(y~.,d[H$tr,],model="mlpe",search=2)
P=predict(M,d[H$ts,])
cat("batch :",b,"TR from:",H$tr[1],"to:",H$tr[length(H$tr)],"size:",length(H$tr),
"TS from:",H$ts[1],"to:",H$ts[length(H$ts)],"size:",length(H$ts),
"mae:",mmetric(d$y[H$ts],P,"MAE"),"\n")
}
# rolling holdout example (sliding window)
for(b in 1:4) # iterations
{
H=holdout(d$y,ratio=4,mode="rolling",iter=b,window=5,increment=2)
M=fit(y~.,d[H$tr,],model="mlpe",search=2)
P=predict(M,d[H$ts,])
cat("batch :",b,"TR from:",H$tr[1],"to:",H$tr[length(H$tr)],"size:",length(H$tr),
"TS from:",H$ts[1],"to:",H$ts[length(H$ts)],"size:",length(H$ts),
"mae:",mmetric(d$y[H$ts],P,"MAE"),"\n")
}
## End(Not run)
### local seed simple example
## Not run:
# seed is defined, same sequence for N1 and N2:
# s2 generation sequence is not affected by the holdout call
set.seed(1); s1=sample(1:10,3)
set.seed(1);
N1=holdout(1:10,seed=123) # local seed
N2=holdout(1:10,seed=123) # local seed
print(N1$tr)
print(N2$tr)
s2=sample(1:10,3)
cat("s1:",s1,"\n")
cat("s2:",s2,"\n") # s2 is equal to s1
## End(Not run)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.