Nothing
## ----exampledata--------------------------------------------------------------
library('vtreat')
dTrainC <- data.frame(x=c('a','a','a','b','b',NA),
y=c(FALSE,FALSE,TRUE,FALSE,TRUE,TRUE))
treatmentsC <- designTreatmentsC(dTrainC,colnames(dTrainC),'y',TRUE,
catScaling=FALSE,
verbose=FALSE)
dTrainCTreatedUnscaled <- prepare(treatmentsC,dTrainC,pruneSig=c(),scale=FALSE)
dTrainCTreatedScaled <- prepare(treatmentsC,dTrainC,pruneSig=c(),scale=TRUE)
## ----printorig----------------------------------------------------------------
print(dTrainC)
## ----printunscaled------------------------------------------------------------
print(dTrainCTreatedUnscaled)
## ----printscaled--------------------------------------------------------------
print(dTrainCTreatedScaled)
## ----check--------------------------------------------------------------------
slopeFrame <- data.frame(varName = treatmentsC$scoreFrame$varName,
stringsAsFactors = FALSE)
slopeFrame$mean <-
vapply(dTrainCTreatedScaled[, slopeFrame$varName, drop = FALSE], mean,
numeric(1))
slopeFrame$slope <- vapply(slopeFrame$varName,
function(c) {
lm(paste('y', c, sep = '~'),
data = dTrainCTreatedScaled)$coefficients[[2]]
},
numeric(1))
slopeFrame$sig <- vapply(slopeFrame$varName,
function(c) {
treatmentsC$scoreFrame[treatmentsC$scoreFrame$varName == c, 'sig']
},
numeric(1))
slopeFrame$badSlope <-
ifelse(is.na(slopeFrame$slope), TRUE, abs(slopeFrame$slope - 1) > 1.e-8)
print(slopeFrame)
## ----catscale-----------------------------------------------------------------
treatmentsC2 <- designTreatmentsC(dTrainC,colnames(dTrainC),'y',TRUE,
catScaling=TRUE,
verbose=FALSE)
dTrainCTreatedScaled2 <- prepare(treatmentsC2,dTrainC,pruneSig=c(),scale=TRUE)
print(dTrainCTreatedScaled2)
## ----checks-------------------------------------------------------------------
colMeans(dTrainCTreatedScaled2)
lm(y~x_lev_NA,data=dTrainCTreatedScaled)
lm(y~x_lev_NA,data=dTrainCTreatedScaled2)
## -----------------------------------------------------------------------------
vapply(slopeFrame$varName,
function(c) {
glm(paste('y', c, sep = '~'),family=binomial,
data = dTrainCTreatedScaled2)$coefficients[[2]]
},
numeric(1))
## -----------------------------------------------------------------------------
set.seed(235235)
dTrainN <- data.frame(x1=rnorm(100),
x2=rnorm(100),
x3=rnorm(100),
stringsAsFactors=FALSE)
dTrainN$y <- 1000*(dTrainN$x1 + dTrainN$x2)
cEraw <- vtreat::mkCrossFrameNExperiment(dTrainN,
c('x1','x2','x3'),'y',
scale=TRUE)
newvars <- cEraw$treatments$scoreFrame$varName
print(newvars)
dM1 <- as.matrix(cEraw$crossFrame[, newvars])
pCraw <- stats::prcomp(dM1,
scale.=FALSE,center=TRUE)
print(pCraw)
dTrainN$yScaled <- scale(dTrainN$y,center=TRUE,scale=TRUE)
cEscaled <- vtreat::mkCrossFrameNExperiment(dTrainN,
c('x1','x2','x3'),'yScaled',
scale=TRUE)
newvars_s <- cEscaled$treatments$scoreFrame$varName
print(newvars_s)
dM2 <- as.matrix(cEscaled$crossFrame[, newvars_s])
pCscaled <- stats::prcomp(dM2,
scale.=FALSE,center=TRUE)
print(pCscaled)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.