Nothing
## -----------------------------------------------------------------------------
signk <- function(n,k) {
sigTab <- data.frame(y=c(rep(TRUE,n/2),rep(FALSE,n/2)),v=FALSE)
sigTab[seq_len(k),'v'] <- TRUE
vtreat::designTreatmentsC(sigTab,'v','y',TRUE,verbose=FALSE)$scoreFrame[1,'sig']
}
sigTab <- data.frame(k=c(1,2,3,4,5,10,20,50,100))
# If you want to see a rare but perfect indicator of positive class
# that's only on k times out of 1000, this is the lower bound on pruneSig
sigTab$sigEst = vapply(sigTab$k,function(k) signk(1000,k),numeric(1))
sigTab$minusLogSig = -log(sigTab$sigEst) # we expect this to be approximately k
print(sigTab)
## -----------------------------------------------------------------------------
set.seed(3346)
n <- 1000
k <- 4
d <- data.frame(y=rbinom(n,size=1,prob=0.5)>0)
d$catVarNoise <- rep(paste0('lev',sprintf("%03d",1:floor(n/k))),(k+1))[1:n]
d$catVarPerfect <- paste0(d$catVar,substr(as.character(d$y),1,1))
d <- d[order(d$catVarPerfect),]
head(d)
treatmentsC <- vtreat::designTreatmentsC(d,c('catVarNoise','catVarPerfect'),'y',TRUE)
# Estimate effect significance (not coefficient significance).
estSigGLM <- function(xVar,yVar,numberOfHiddenDegrees=0) {
d <- data.frame(x=xVar,y=yVar,stringsAsFactors = FALSE)
model <- stats::glm(stats::as.formula('y~x'),
data=d,
family=stats::binomial(link='logit'))
delta_deviance <- model$null.deviance - model$deviance
delta_df <- model$df.null - model$df.residual + numberOfHiddenDegrees
pRsq <- 1.0 - model$deviance/model$null.deviance
sig <- stats::pchisq(delta_deviance, delta_df, lower.tail=FALSE)
sig
}
prepD <- vtreat::prepare(treatmentsC,d,pruneSig=c())
## ----scoreframe---------------------------------------------------------------
print(treatmentsC$scoreFrame[,c('varName','rsq','sig','extraModelDegrees')])
## ----scoresignal--------------------------------------------------------------
summary(glm(y~d$catVarPerfect=='lev001T',data=d,family=binomial))
estSigGLM(prepD$catVarPerfect_catB,prepD$y,0) # wrong est
estSigGLM(prepD$catVarPerfect_catB,prepD$y,
numberOfHiddenDegrees=length(unique(d$catVarPerfect))-1)
## ----scorenoise---------------------------------------------------------------
summary(glm(y~d$catVarNoise=='lev001',data=d,family=binomial))
estSigGLM(prepD$catVarNoise_catB,prepD$y,0) # wrong est
estSigGLM(prepD$catVarNoise_catB,prepD$y,
numberOfHiddenDegrees=length(unique(d$catVarNoise))-1)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.