Nothing
#' Fence model selection (Nonparametric Model)
#'
#' Fence model selection (Noparametric Model)
#' @param full formula of full model
#' @param data data
#' @param spline variable needed for spline terms
#' @param ps order of power
#' @param qs number of knots
#' @param B number of bootstrap sample, parametric for lmer
#' @param grid grid for c
#' @param bandwidth bandwidth for kernel smooth function
#' @param lambda A grid of lambda values
#' @return
#' \item{models}{list all model candidates with p polynomial degrees and q knots in the model space}
#' \item{Qd_matrix}{list a matrix of QM - QM.tilde for all model candidates. Each row is for each bootrap sample}
#' \item{bandwidth}{list the value of bandwidth}
#' \item{model_mat}{list a matrix of selected models at each c values in grid (in columns). Each row is for each bootstrap sample}
#' \item{freq_mat}{list a matrix of coverage probabilities (frequency/smooth_frequency) of each selected models for a given c value (index)}
#' \item{c}{list the adaptive choice of c value from which the parsimonious model is selected}
#' \item{lambda}{penalty (or smoothing) parameter estimate given selected p and q}
#' \item{sel_model}{list the selected (parsimonious) model given the adaptive c value}
#' \item{beta.est.u}{A list of coefficient estimates given a lambda value}
#' \item{f.x.hat}{A vector of fitted values obtained from a given lambda value and beta.est.u}
#' @note The current Fence method in Nonparametric model focuses on one spline variable.
#' This method can be extended to a general case with more than one spline variables, and includes non-spline variables.
#' @author Jiming Jiang Jianyang Zhao J. Sunil Rao Bao-Qui Tran Thuan Nguyen
#' @references
#' \itemize{
#' \item{Jiang J., Rao J.S., Gu Z., Nguyen T. (2008), Fence Methods for Mixed Model Selection. The Annals of Statistics, 36(4): 1669-1692}
#' \item{Jiang J., Nguyen T., Rao J.S. (2009), A Simplified Adaptive Fence Procedure. Statistics and Probability Letters, 79, 625-629}
#' \item{Jiang J., Nguyen T., Rao J.S. (2010), Fence Method for Nonparametric Small Area Estimation. Survey Methodology, 36, 1, 3-11}
#' }
#'
#' @examples
#' \dontrun{
#' require(fence)
#' n = 100
#' set.seed(1234)
#' x=runif(n,0,3)
#' y = 1-x+x^2- 2*(x-1)^2*(x>1) + 2*(x-2)^2*(x>2) + rnorm(n,sd=.2)
#' lambda=exp((c(1:60)-30)/3)
#' data=data.frame(cbind(x,y))
#' test_NF = fence.NF(full=y~x, data=data, spline='x', ps=c(1:3), qs=c(2,5), B=1000, lambda=lambda)
#' plot(test_NF)
#' summary <- summary(test_NF)
#' model_sel <- summary[[1]]
#' model_sel
#' lambda_sel <- summary[[2]]
#' lambda_sel
#' }
#' @export
fence.NF = function(full, data, spline, ps = 1:3, qs = NA, B = 100, grid = 101, bandwidth = NA, lambda) {
n = nrow(data)
if (all(is.na(qs))) {
lower = ceiling(n/5)
upper = floor(n/4)
if (n < 50) {
lower = 2
}
if (n > 500) {
lower = 100
upper = 125
}
qs = lower:upper
}
y = matrix(data[,as.character(full)[2]], ncol = 1)
baseX = model.matrix(full, data)
nX= ncol(baseX)-1
#### STOP IF MORE THAN 1 COVARIATES ###
if (nX >= 2) {
stop(paste0("Model input has ", ncol(baseX)-1, ' predictors. Current fence package can only considers 1 predictor at a time.'))
}
####
splineX=which(colnames(baseX)[-1] %in% spline)
expand.grid.X = function(nX,ps, qs){
psX=replicate(nX, c(0,ps), simplify=FALSE)
qsX=replicate(nX, c(0,qs), simplify=FALSE)
qpX = c(psX, qsX)
model.cand <- expand.grid(qpX)
colnames(model.cand) <- paste(c(rep('p',nX),rep('q',nX)), rep(1:nX, times=2), sep='')
for (i in c(1:nX)){
#remove candidate models where p=0 and q>0
model.cand <- model.cand[!(model.cand[,i]==0 & model.cand[,nX+i]>0),]
#remove candidate models where q>0 for X's not in spline list
if(!i %in% splineX) model.cand <- model.cand[model.cand[,nX+i]==0,]
}
rownames(model.cand) <- NULL
return(model.cand)
}
model.cand <- expand.grid.X(nX, ps, qs)
#Get X^p and knots for all p,q,x
addtionXlist =apply(as.matrix(baseX[,-1],nrow=n),2, genaddX, ps=ps, qs=qs)
# Get matrix of X's and Z's for each model with known p and q
make.eachX = function(addtionXlist, xindex, p, q){
psm = addtionXlist[[xindex]]$pmatrix #matrix where each column is X^p for different p
qsm = addtionXlist[[xindex]]$qmatrix #list contaning knots column for different number of knots
if(p==0 & q==0) Xall=as.matrix(baseX[,1], ncol=1); Zall=list(NULL)
if(p==1) Xall = baseX[,c(1,xindex+1)]
if(p>1) Xall=cbind(baseX[,c(1,xindex+1)], psm[,2:p])
if(q==0) Zall = list(NULL)
if(q>0) {
whichq=which(qs==q)
Zall = qsm[[p]][[whichq]]
}
return(list('X'=Xall,'Z'= Zall))
}
#X and Z list for all candidate models
Xall=Zall=list()
for (i in 1:nrow(model.cand)){
m=model.cand[i,]
#get X's columns and Z's column of each x covariate
eachmod=lapply(1:nX, FUN=function(xi) {make.eachX(addtionXlist, xi, p=as.numeric(m[xi]), q=as.numeric(m[nX+xi]))})
# extract X's columns for each x and combine into a matrix
Xmat=matrix(unlist(lapply(eachmod, function(eachXls){eachXls$X})), nrow=n, byrow = FALSE)
if(nX>1) Xmat=Xmat[,-(which(colSums(Xmat==1)==n)[-1])] #remove repeated intercept-column of 1's
# extract Z's columns for each x and combine into a matrix
Zmat=unlist(lapply(eachmod, function(eachXls){eachXls$Z}))
if(length(Zmat)==0) {Zmat=list()} else{Zmat = matrix(Zmat, nrow=n, byrow = FALSE)}
Xall[[i]]=Xmat
Zall[[i]]=Zmat
}
Wall=mapply(cbind,Xall, Zall)
##Calculate lack-of-fit
Q.comb=sapply(Wall, function(W){
W=matrix(unlist(W),nrow=n)
temp.b=solve(t(W)%*%W)%*%t(W)%*%y
y.hat = W%*%temp.b
list(t(y - y.hat)%*%(y - y.hat), temp.b)
})
Q.hat=unlist(Q.comb[1,])
beta.hat=Q.comb[2,]
Qdiff = Q.hat - min(Q.hat)
# Chose model with minimum lack-of-fit
index.min = which.min(Q.hat)
model.min = model.cand[index.min,]
# Bootstrap based on chosen model (not full model)
bs = genNFbs(B, y, Wall[[index.min]], beta.hat[[index.min]])
# Recalculate lack-of-fit for all models & bootstraps
Q.star=sapply(Wall, function(W){
W=matrix(unlist(W),nrow=n)
temp.b=solve(t(W)%*%W)%*%t(W)%*%bs
y.hat = W%*%temp.b
colSums((bs-y.hat)^2)
})
Qdiff.star = sweep(Q.star, 1, apply(Q.star, 1, min), '-') #B x number of modelcandidate
# Find peak Cn
cs = seq(0, max(Qdiff.star), length.out = grid)
if (is.na(bandwidth)) {
bandwidth = (cs[2] - cs[1]) * 3
}
# Optimal model < Cn
model.search=function(model.cand, infence){
m.infence=model.cand[infence,]
index.infence=which(infence)
sump=apply(m.infence, 1, function(r){sum(r[1:nX])})
sumq=apply(m.infence, 1, function(r){sum(r[(nX+1):(nX+nX)])})
return(min(index.infence[order(sumq, sump)]))
}
model_mat = matrix(NA, nrow = B, ncol = grid)
for (i in 1:length(cs)) {
infence_matrix = Qdiff.star <= cs[i]
model_mat[, i] = apply(infence_matrix, 1, function(x){model.search(model.cand,x)})
}
freq_mat = apply(model_mat, 2, function(l) {
tab = sort(table(l), decreasing = TRUE)
c(as.numeric(names(tab)[1]), tab[1])
})
freq_mat[2,] = freq_mat[2,] / B
freq_mat = rbind(freq_mat, ksmooth(cs, freq_mat[2,], kernel = "normal", bandwidth = bandwidth, x.points = cs)$y)
colnames(freq_mat) = cs
rownames(freq_mat) = c("index", "frequency", "smooth_frequency")
cindex = peakw(cs, freq_mat[3,], 2)
Cn.peak = cs[cindex]
# Find p and q that give Qdiff < Cn.peak
index.infence=which(Qdiff < Cn.peak)
correct.m=model.cand[index.infence,]
sump.m=apply(correct.m, 1, function(r){sum(r[1:nX])})
sumq.m=apply(correct.m, 1, function(r){sum(r[(nX+1):(nX+nX)])})
index.m=min(index.infence[order(sumq.m, sump.m)])
# Get y fit
get.y.fit.out = get.y.fit(index.ind=index.m,beta.hat,X=Xall,Z=Zall,y,Q.hat,index=index.min, Cn.ind=Cn.peak, lambda, model.cand)
lambda.fit = get.y.fit.out[[1]]
beta.est.u = get.y.fit.out[[2]]
f.x.hat = get.y.fit.out[[3]]
ans = list(full = full, models = model.cand,
model_mat =model_mat, freq_mat =freq_mat,
Qd_matrix = Qdiff, bandwidth =bandwidth,beta.est.u=beta.est.u,
f.x.hat =f.x.hat, sel_model=model.cand[index.m,], c=Cn.peak, lambda = lambda.fit)
class(ans) = "NF"
return(ans)
}
############# HELPER FUNCTIONS #######################
genaddX = function(svalue, ps, qs) {
psm = outer(svalue, ps, '^')
qsm = lapply(ps, function(p) {
res = lapply(qs, function(q) {
knots = cover.design(R=as.matrix(svalue, ncol = 1), nd=q)
drop(outer(svalue, knots$design, function(x, y) ifelse(x < y, 0, x - y)) ^ p)
})
names(res) = paste0("q", qs)
res
})
names(qsm) = paste0("p", ps)
list(pmatrix = psm, qmatrix = qsm)
}
genNFbs = function(B, y, W0, beta0) {
sigma = sqrt(sum((W0 %*% beta0 - y)^2) / (nrow(W0) - ncol(W0)))
ybase = W0 %*% beta0
matrix(as.vector(ybase) + rnorm(B * nrow(W0), 0, sigma), nrow = nrow(W0))
}
Q.cal.fit = function(lambda.ind,temp.x,temp.z, y,len.q){
para.est = para.fit(lambda.ind,temp.x,temp.z,y,len.q)
x.beta.fit = para.est[[1]]
u.est.fit = para.est[[2]]
temp.e = y - temp.x%*%x.beta.fit - temp.z%*%u.est.fit
t(temp.e)%*%temp.e
}
lambda.cal = function(k.index){
exp((k.index-30)/3)
}
para.fit = function(lambda.est,temp.x,temp.z,y,len.q){
n <- NULL
rm(n)
v.est = diag(n) + lambda.est^(-1)*(temp.z%*%t(temp.z))
v.est.inv = solve(v.est)
x.v.inv = t(temp.x)%*%v.est.inv
beta.est.fit = solve(x.v.inv%*%temp.x)%*%x.v.inv%*%y
x.beta.fit = temp.x%*%beta.est.fit
u.est.fit = lambda.est^(-1)*solve(diag(len.q)+ lambda.est^(-1)*(t(temp.z)%*%temp.z))%*%t(temp.z)%*%(y - x.beta.fit)
list(beta.est.fit,u.est.fit)
}
get.y.fit = function(index.ind,beta.hat,X,Z,y,Q.hat,index,Cn.ind,lambda,model.cand){
model.m=model.cand[index.ind,]
beta.m = beta.hat[[index.ind]]
nX=ncol(model.cand)/2
len.q = sum(model.m[(nX+1):(nX+nX)])
if(len.q==0){
f.x.hat = X[[index.ind]]%*%beta.m
beta.est.u = list(beta.m, NULL)
lambda.fit = 'no knots'
}else{
temp.x = X[[index.ind]]
temp.z = Z[[index.ind]]
Q.hat.est = rep(NA,length(lambda))
for(k in 1:length(lambda)){
Q.hat.est[k] = Q.cal.fit(lambda[k],temp.x,temp.z,y,len.q)
}
Q.diff.est = Q.hat.est - Q.hat[index]
k.index = which(Q.diff.est <= Cn.ind)
k.index = k.index[length(k.index)]
lambda.u = lambda.cal(k.index+1)
lambda.l = lambda.cal(k.index)
lambda.diff = lambda.u - lambda.l
error = lambda.diff - 10^(-6)
while(error > 0){
mid.lambda = lambda.diff/2 + lambda.l
Q.est = Q.cal.fit(mid.lambda,temp.x,temp.z,y,len.q)
Q.tilte = Q.hat[index]
Q.diff.fit = Q.est - Q.tilte
if(Q.diff.fit <= Cn.ind){
lambda.l = mid.lambda
lambda.diff = lambda.u - lambda.l
}else{
lambda.u = mid.lambda
lambda.diff = lambda.u - lambda.l
}
error = lambda.diff - 10^(-6)
}
lambda.fit = lambda.l
beta.est.u = para.fit(lambda.fit,temp.x,temp.z,y,len.q)
f.x.hat = temp.x%*%beta.est.u[[1]]+temp.z%*%beta.est.u[[2]]
}
list(lambda.fit,beta.est.u,f.x.hat)
}
#' Plot Nonparametric Fence model selection
#'
#' @param x Object to be plotted
#' @param ... Additional arguments. CNot currently used.
#' @export
#' @importFrom utils globalVariables
plot.NF = function(x = res, ...) {
res <- sp <- m <- NULL
rm(res,sp,m)
tmp = data.frame(c = as.numeric(colnames(x$freq_mat)),
p = x$freq_mat[2, ],
sp = x$freq_mat[3, ],
m = as.factor(x$freq_mat[1, ]))
p = ggplot(tmp) +
geom_point(aes(x = c, y = p, colour = m)) +
geom_line(aes(x = c, y = sp), linetype="dashed") +
geom_line(aes(x = c, y = sp, colour = m)) +
ylim(0, 1)
p
}
#' Summary Nonparametric Fence model selection
#'
#' @param object Object to be summarized
#' @param ... addition arguments. Not currently used
#' @export
summary.NF = function(object = res, ...) {
res <- NULL
rm(res)
print(list(object$sel_model, object$lambda))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.