tests/treble4.R
In rpart: Recursive Partitioning and Regression Trees

#
# Treble test for class trees with 2 outcomes
#
# fit1 and fit1b failed equality because .7 and .3 are not easily represented
# in binary.  Thus a complelxity param was 4e-17 (basically 0, but enough
# to cause a split where it shouldn't be). Eric Lunde 2005-08-03
library(rpart)
control <- rpart.control(maxsurrogate=0, cp=1e-15, xval=0)
set.seed(10)

fit1 <- rpart(Kyphosis ~ Age + Number + Start, data=kyphosis, 
              control=control,
              parms=list(prior=c(.7,.3), 
                loss=matrix(c(0,1,2,0),nrow=2,ncol=2)))
wts <- rep(3, nrow(kyphosis))
fit1b <- rpart(Kyphosis ~ Age + Number + Start, data=kyphosis, 
               control=control,
	       weights=wts,
               parms=list(prior=c(.7,.3), 
                 loss=matrix(c(0,1,2,0),nrow=2,ncol=2)))
fit1b$frame$wt   <- fit1b$frame$wt/3
fit1b$frame$dev  <- fit1b$frame$dev/3
fit1b$frame$yval2[,2:3] <- fit1b$frame$yval2[,2:3]/3
fit1b$splits[,3] <- fit1b$splits[,3]/3
fit1b$variable.importance <- fit1b$variable.importance/3
all.equal(fit1[-3], fit1b[-3])   #all but the "call"

# Now for a set of non-equal weights
nn <- nrow(kyphosis)
pseudo <- double(nn)
pseudo[1] <- pi/6
for (i in 2:nn) pseudo[i] <- 4*pseudo[i-1]*(1-pseudo[i-1])

wts <-  rep(1:7, length=nn)
temp <- rep(1:nn, wts)             #row replicates
xgrp <- rep(1:10, length=nn)[order(pseudo)]
xgrp2<- rep(xgrp, wts)

# The cp value stops one last split where the two predictors are
#  completely equal in importance (perfect surrogates), but the
#  weighted and unweighted pick a different one due to round off error
tempc <- rpart.control(minsplit=2, xval=xgrp2, maxsurrogate=0, cp=.039)
#  Direct: replicate rows in the data set, and use unweighted
fit2 <- rpart(Kyphosis ~ Age + Number + Start, data=kyphosis[temp,], 
               control=tempc, 
               parms=list(prior=c(.7,.3), 
                          loss=matrix(c(0,1,2,0),nrow=2,ncol=2)))
#  Weighted
tempc <- rpart.control(minsplit=2, xval=xgrp, maxsurrogate=0, cp=.039)
fit2b <- rpart(Kyphosis ~ Age + Number + Start, data=kyphosis, 
               control=tempc, weights=wts,
               parms=list(prior=c(.7,.3), 
                          loss=matrix(c(0,1,2,0),nrow=2,ncol=2)))

all.equal(fit2$frame[,-2],  fit2b$frame[,-2])  # the "n" component won't match
all.equal(fit2$cptable, fit2b$cptable)
all.equal(fit2$splits[,-1],fit2b$splits[,-1]) 
all.equal(fit2$csplit,    fit2b$csplit)