## This function performs an individual bootstrap test of significance
## for a crs regression model. If the index `i' is provided it
## computes the test for predictor `i', otherwise it computes the test
## for all predictors, one at a time.
## (C) Jeffrey S. Racine (2011)
## Two methods for imposing the null are implemented, a) a residual
## bootstrap and b) a reordering of the predictor being tested (in
## place) that breaks any systematic relationship between the
## predictor and outcome.
crssigtest <- function(model = NULL,
index = NULL,
boot.num = 399,
boot.type = c("residual","reorder"),
random.seed = 42,
boot = TRUE) {
## Save any existing seed prior to setting the seed for this routine
## (we set the seed to ensure the same outcome when the test is run
## on the same data, otherwise resampling error would result in
## different P values each time the test is run which could be
## unsettling for some users - this can naturally be overridden by
## setting a unique seed each time the test is run)
if(exists(".Random.seed", .GlobalEnv)) {
save.seed <- get(".Random.seed", .GlobalEnv)
exists.seed <- TRUE
} else {
exists.seed <- FALSE
}
set.seed(random.seed)
## Some basic checking for invalid (unintentional) use of the
## function
if(is.null(model)) stop(" you must provide a crs model")
if(is.null(index)) index <- 1:NCOL(model$xz)
if(index < 1 || index > NCOL(model$xz)) stop(" you must provide a valid index")
boot.type <- match.arg(boot.type)
## Some storage vectors/matrices
df1.vec <- numeric(length(index))
df2.vec <- numeric(length(index))
uss.vec <- numeric(length(index))
rss.vec <- numeric(length(index))
F.vec <- numeric(length(index))
P.vec.boot <- numeric(length(index))
P.vec.asy <- numeric(length(index))
F.boot <- numeric(length=boot.num)
F.boot.mat <- matrix(NA,nrow=boot.num,ncol=length(index))
for(ii in 1:length(index)) {
## Get information from model each time the test is run (we modify
## these below hence the need to reset when multiple tests are
## requested)
model.degree <- model$degree
model.lambda <- model$lambda
model.segments <- model$segments
model.basis <- model$basis
model.knots <- model$knots
## Placeholders for null degree/lambda/segments
degree.restricted <- model.degree
lambda.restricted <- model.lambda
segments.restricted <- model.segments
## Determine whether variable(s) being considered is of type
## numeric or type factor
xz.numeric <- FALSE
if(is.numeric(model$xz[,index[ii]])) xz.numeric <- TRUE
## Test whether degree (or bandwidth) for the variable being
## tested is zero (or one). First, the predictors can be in any
## order (numeric/factor) so we need to be careful to set the
## degree/lambda for the predictor being considered
if(xz.numeric) {
## Predictor being tested is numeric, so we need to determine
## the `degree' index (i.e. which numeric predictor is being
## tested, and what is its index among all predictors in the
## matrix of all predictors model$xz)
degree.index <- 0
for(jj in 1:index[ii]) if(is.numeric(model$xz[,jj])) degree.index <- degree.index + 1
## If the degree is zero (i.e. cross-validation has determined a
## variable is `irrelevant'), allow the predictor to be included
## using a degree 1/segments 1 fit (latter not necessary but
## zero cost to so doing) to avoid degeneracy of the statistic
## (if we did not the restricted and unrestricted models would
## coincide)
model.degree[degree.index] <- ifelse(model$degree[degree.index]==0,1,model$degree[degree.index])
model.segments[degree.index] <- ifelse(model$degree[degree.index]==0,1,model$segments[degree.index])
## Set the null degree to zero and segments to one (latter not
## necessary but zero cost to so doing)
degree.restricted[degree.index] <- 0
segments.restricted[degree.index] <- 1
} else {
## Predictor being tested is categorical, so need to determine
## the `lambda' index
lambda.index <- 0
for(jj in 1:index[ii]) if(!is.numeric(model$xz[,jj])) lambda.index <- lambda.index + 1
## If the bandwidth lambda is one (i.e. cross-validation has
## determined a variable is `irrelevant'), allow the model to be
## included using the `frequency' fit (bandwidth of basically
## zero)
model.lambda[lambda.index] <- ifelse(isTRUE(all.equal(model$lambda[lambda.index],1)),.Machine$double.eps,model$lambda[lambda.index])
## Set the null bandwidth to one
lambda.restricted[lambda.index] <- 1
}
## Now compute the unrestricted and restricted models with the
## caveats outlined above for the unrestricted model
model.unrestricted <- crs(xz=model$xz,
y=model$y,
cv="none",
degree=model.degree,
lambda=model.lambda,
segments=model.segments,
basis=model.basis,
knots=model.knots)
model.restricted <- crs(xz=model$xz,
y=model$y,
cv="none",
degree=degree.restricted,
lambda=lambda.restricted,
segments=segments.restricted,
basis=model.basis,
knots=model.knots)
## Compute the pseudo F-value under the null (here we could use
## the unrestricted residuals from the model fed to this function
## i.e. residuals(model)) while we compute df for the model
## augmented (i.e. if degree==0 set degree==1) to avoid degeneracy
## when a variable is automatically removed.
## Note - if we test a categorical variable df1 will be zero (the
## categorical predictor does not enter as a variable, rather it
## enters as a weight and will not be recorded in model$k, the
## number of parameters). In this case we approximate the number
## of restrictions imposed using the value 1.
uss.vec[ii] <- sum(residuals(model.unrestricted)^2)
rss.vec[ii] <- sum(residuals(model.restricted)^2)
df1.vec[ii] <- max(1,round(sum(model.unrestricted$hatvalues))-round(sum(model.restricted$hatvalues)))
df2.vec[ii] <- model$nobs-round(sum(model.unrestricted$hatvalues))
## Compute the statistic and save each in F.vec (we allow multiple
## tests to be computed with one call to this function).
F.df <- df2.vec[ii]/df1.vec[ii]
F.pseudo <- F.df*(rss.vec[ii]-uss.vec[ii])/uss.vec[ii]
F.vec[ii] <- F.pseudo
## If boot == TRUE conduct the bootstrap, otherwise only conduct
## the asymptotic P-value
if(boot) {
## Bootstrap the P-values for the test statistic (we also record
## the asymptotic P-values)
if(boot.type=="reorder") xz.boot <- model$xz
for(bb in 1:boot.num) {
## Bootstrap sample under the null and recompute the
## `restricted' and `unrestricted' models for the null sample
if(boot.type=="reorder") {
xz.boot[,index[ii]] <- sample(model$xz[,index[ii]],replace=T)
model.unrestricted.boot <- crs(xz=xz.boot,
y=model$y,
cv="none",
degree=model.degree,
lambda=model.lambda,
segments=model.segments,
basis=model.basis,
knots=model.knots)
model.restricted.boot <- crs(xz=xz.boot,
y=model$y,
cv="none",
degree=degree.restricted,
lambda=lambda.restricted,
segments=segments.restricted,
basis=model.basis,
knots=model.knots)
} else {
y.boot <- fitted(model.restricted) + as.numeric(scale(sample(residuals(model),replace=T),center=TRUE,scale=FALSE))
model.unrestricted.boot <- crs(xz=model$xz,
y=y.boot,
cv="none",
degree=model.degree,
lambda=model.lambda,
segments=model.segments,
basis=model.basis,
knots=model.knots)
model.restricted.boot <- crs(xz=model$xz,
y=y.boot,
cv="none",
degree=degree.restricted,
lambda=lambda.restricted,
segments=segments.restricted,
basis=model.basis,
knots=model.knots)
}
## Recompute the pseudo F-value under the null
uss.boot <- sum(residuals(model.unrestricted.boot)^2)
rss.boot <- sum(residuals(model.restricted.boot)^2)
F.boot[bb] <- F.df*(rss.boot-uss.boot)/uss.boot
}
F.boot.mat[,ii] <- F.boot
P.vec.boot[ii] <- mean(ifelse(F.boot > F.pseudo, 1, 0))
}
## Compute the asymptotic P value
P.vec.asy[ii] <- pf(F.pseudo,df1=df1.vec[ii],df2=df2.vec[ii],lower.tail=FALSE)
}
## Restore seed
if(exists.seed) assign(".Random.seed", save.seed, .GlobalEnv)
## Return a list containing test results
return(sigtest.crs(index=index,
P=P.vec.boot,
P.asy=P.vec.asy,
F=F.vec,
F.boot=F.boot.mat,
df1=df1.vec,
df2=df2.vec,
rss=rss.vec,
uss=uss.vec,
boot.num=boot.num,
boot.type=boot.type,
xnames=names(model$xz)))
}
sigtest.crs <- function(index,
P,
P.asy,
F,
F.boot,
df1,
df2,
rss,
uss,
boot.num,
boot.type,
xnames){
tsig <- list(index=index,
P=P,
P.asy=P.asy,
F=F,
F.boot=F.boot,
df1=df1,
df2=df2,
rss=rss,
uss=uss,
boot.num=boot.num,
boot.type=switch(boot.type,
"residual" = "Residual",
"reorder" = "Reorder"),
xnames=xnames)
tsig$reject <- rep('', length(F))
tsig$rejectNum <- rep(NA, length(F))
tsig$reject[a <- (P < 0.1)] <- '.'
tsig$rejectNum[a] <- 10
tsig$reject[a <- (P < 0.05)] <- '*'
tsig$rejectNum[a] <- 5
tsig$reject[a <- (P < 0.01)] <- '**'
tsig$rejectNum[a] <- 1
tsig$reject[a <- (P < 0.001)] <- '***'
tsig$rejectNum[a] <- 0.1
class(tsig) = "sigtest.crs"
return(tsig)
}
print.sigtest.crs <- function(x, ...){
cat("\nRegression Spline Significance Test",
"\nTest Type: ", x$boot.type," (",x$boot.num,
" replications)",
"\nPredictors tested for significance:\n",
paste(paste(x$xnames[x$index]," (",x$index,")", sep=""), collapse=", "),"\n\n",
sep="")
maxNameLen <- max(nc <- nchar(nm <- x$xnames[x$index]))
cat("\nSignificance Test Summary\n")
cat("P Value:", paste("\n", nm, ' ', blank(maxNameLen-nc), format.pval(x$P),
" ", formatC(x$reject,width=-4,format="s"),
"(F = ", formatC(x$F,digits=4,format="fg"),
", df1 = ", x$df1,
", df2 = ", x$df2,
", rss = ", formatC(x$rss,digits=6,format="fg"),
", uss = ", formatC(x$uss,digits=6,format="fg"),")",sep=''))
cat("\n---\nSignif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\n")
}
summary.sigtest.crs <- function(object, ...) {
print(object)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.