inst/doc/vtreatScaleMode.R

## ----exampledata--------------------------------------------------------------
library('vtreat')
dTrainC <- data.frame(x=c('a','a','a','b','b',NA),
                      y=c(FALSE,FALSE,TRUE,FALSE,TRUE,TRUE))
treatmentsC <- designTreatmentsC(dTrainC,colnames(dTrainC),'y',TRUE,
                                 catScaling=FALSE,
                                 verbose=FALSE)
dTrainCTreatedUnscaled <- prepare(treatmentsC,dTrainC,pruneSig=c(),scale=FALSE)
dTrainCTreatedScaled <- prepare(treatmentsC,dTrainC,pruneSig=c(),scale=TRUE)

## ----printorig----------------------------------------------------------------
print(dTrainC)

## ----printunscaled------------------------------------------------------------
print(dTrainCTreatedUnscaled)

## ----printscaled--------------------------------------------------------------
print(dTrainCTreatedScaled)

## ----check--------------------------------------------------------------------
slopeFrame <- data.frame(varName = treatmentsC$scoreFrame$varName,
                         stringsAsFactors = FALSE)
slopeFrame$mean <-
  vapply(dTrainCTreatedScaled[, slopeFrame$varName, drop = FALSE], mean,
         numeric(1))
slopeFrame$slope <- vapply(slopeFrame$varName,
                           function(c) {
                             lm(paste('y', c, sep = '~'),
                                data = dTrainCTreatedScaled)$coefficients[[2]]
                           },
                           numeric(1))
slopeFrame$sig <- vapply(slopeFrame$varName,
                         function(c) {
                           treatmentsC$scoreFrame[treatmentsC$scoreFrame$varName == c, 'sig']
                         },
                         numeric(1))
slopeFrame$badSlope <-
  ifelse(is.na(slopeFrame$slope), TRUE, abs(slopeFrame$slope - 1) > 1.e-8)
print(slopeFrame)

## ----catscale-----------------------------------------------------------------
treatmentsC2 <- designTreatmentsC(dTrainC,colnames(dTrainC),'y',TRUE,
                                  catScaling=TRUE,
                                  verbose=FALSE)
dTrainCTreatedScaled2 <- prepare(treatmentsC2,dTrainC,pruneSig=c(),scale=TRUE)
print(dTrainCTreatedScaled2)

## ----checks-------------------------------------------------------------------
colMeans(dTrainCTreatedScaled2)
lm(y~x_lev_NA,data=dTrainCTreatedScaled)
lm(y~x_lev_NA,data=dTrainCTreatedScaled2)

## -----------------------------------------------------------------------------
 vapply(slopeFrame$varName,
                           function(c) {
                             glm(paste('y', c, sep = '~'),family=binomial,
                                data = dTrainCTreatedScaled2)$coefficients[[2]]
                           },
                           numeric(1))

## -----------------------------------------------------------------------------
set.seed(235235)
dTrainN <- data.frame(x1=rnorm(100),
                      x2=rnorm(100),
                      x3=rnorm(100),
                      stringsAsFactors=FALSE)
dTrainN$y <- 1000*(dTrainN$x1 + dTrainN$x2)
cEraw <- vtreat::mkCrossFrameNExperiment(dTrainN,
                                         c('x1','x2','x3'),'y',
                                         scale=TRUE)

newvars <- cEraw$treatments$scoreFrame$varName
print(newvars)

dM1 <- as.matrix(cEraw$crossFrame[, newvars])
pCraw <- stats::prcomp(dM1,
                       scale.=FALSE,center=TRUE)
print(pCraw)

dTrainN$yScaled <- scale(dTrainN$y,center=TRUE,scale=TRUE)
cEscaled <- vtreat::mkCrossFrameNExperiment(dTrainN,
                                            c('x1','x2','x3'),'yScaled',
                                            scale=TRUE)
newvars_s <- cEscaled$treatments$scoreFrame$varName
print(newvars_s)


dM2 <- as.matrix(cEscaled$crossFrame[, newvars_s])
pCscaled <- stats::prcomp(dM2,
                          scale.=FALSE,center=TRUE)
print(pCscaled)

Try the vtreat package in your browser

Any scripts or data that you put into this service are public.

vtreat documentation built on Aug. 20, 2023, 1:08 a.m.