Nothing
## ----categoricalexample, tidy=FALSE-------------------------------------------
library(vtreat)
dTrainC <- data.frame(x=c('a','a','a','b','b',NA),
z=c(1,2,3,4,NA,6),y=c(FALSE,FALSE,TRUE,FALSE,TRUE,TRUE),
stringsAsFactors = FALSE)
treatmentsC <- designTreatmentsC(dTrainC,colnames(dTrainC),'y',TRUE)
scoreColsToPrint <- c('origName','varName','code','rsq','sig','extraModelDegrees')
print(treatmentsC$scoreFrame[,scoreColsToPrint])
## ----map----------------------------------------------------------------------
# Build a map from vtreat names back to reasonable display names
vmap <- as.list(treatmentsC$scoreFrame$origName)
names(vmap) <- treatmentsC$scoreFrame$varName
print(vmap['x_catB'])
# Map significances back to original variables
aggregate(sig~origName,data=treatmentsC$scoreFrame,FUN=min)
## ----numericexample, tidy=FALSE-----------------------------------------------
library(vtreat)
dTrainN <- data.frame(x=c('a','a','a','b','b',NA),
z=c(1,2,3,4,NA,6),y=as.numeric(c(FALSE,FALSE,TRUE,FALSE,TRUE,TRUE)),
stringsAsFactors = FALSE)
treatmentsN <- designTreatmentsN(dTrainN,colnames(dTrainN),'y')
print(treatmentsN$scoreFrame[,scoreColsToPrint])
## ----notargetexample, tidy=FALSE----------------------------------------------
library(vtreat)
dTrainZ <- data.frame(x=c('a','a','a','b','b',NA),
z=c(1,2,3,4,NA,6),
stringsAsFactors = FALSE)
treatmentsZ <- designTreatmentsZ(dTrainZ,colnames(dTrainZ))
print(treatmentsZ$scoreFrame[, c('origName','varName','code','extraModelDegrees')])
## ----restrict1----------------------------------------------------------------
dTrainN <- data.frame(x=c('a','a','a','b','b',NA),
z=c(1,2,3,4,NA,6),y=as.numeric(c(FALSE,FALSE,TRUE,FALSE,TRUE,TRUE)),
stringsAsFactors = FALSE)
treatmentsN <- designTreatmentsN(dTrainN,colnames(dTrainN),'y',
codeRestriction = c('lev',
'catN',
'clean',
'isBAD'),
verbose=FALSE)
# no catP or catD variables
print(treatmentsN$scoreFrame[,scoreColsToPrint])
## ----restrict2----------------------------------------------------------------
dTreated = prepare(treatmentsN, dTrainN,
codeRestriction = c('lev','clean', 'isBAD'))
# no catN variables
head(dTreated)
## ----selectvars---------------------------------------------------------------
dTrainN <- data.frame(x=c('a','a','a','b','b',NA),
z=c(1,2,3,4,NA,6),y=as.numeric(c(FALSE,FALSE,TRUE,FALSE,TRUE,TRUE)),
stringsAsFactors = FALSE)
treatmentsN <- designTreatmentsN(dTrainN,colnames(dTrainN),'y',
codeRestriction = c('lev',
'catN',
'clean',
'isBAD'),
verbose=FALSE)
print(treatmentsN$scoreFrame[,scoreColsToPrint])
pruneSig <- 1.0 # don't filter on significance for this tiny example
vScoreFrame <- treatmentsN$scoreFrame
varsToUse <- vScoreFrame$varName[(vScoreFrame$sig<=pruneSig)]
print(varsToUse)
origVarNames <- sort(unique(vScoreFrame$origName[vScoreFrame$varName %in% varsToUse]))
print(origVarNames)
# prepare a treated data frame using only the "significant" variables
dTreated = prepare(treatmentsN, dTrainN,
varRestriction = varsToUse)
head(dTreated)
## ----displayvars--------------------------------------------------------------
origVarNames <- sort(unique(vScoreFrame$origName[vScoreFrame$varName %in% varsToUse]))
print(origVarNames)
origVarSigs <- vScoreFrame[vScoreFrame$varName %in% varsToUse,]
aggregate(sig~origName,data=origVarSigs,FUN=min)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.