Nothing
#' Adaptive Fence model selection (Restricted Fence)
#'
#' Adaptive Fence model selection (Restricted Fence)
#'
#' @param full formula of full model
#' @param data data
#' @param groups A list of formulas of (full) model in each bins (groups) of variables
#' @param B number of bootstrap sample, parametric for lmer
#' @param grid grid for c
#' @param bandwidth bandwidth for kernel smooth function
#' @param method either marginal (GEE) or conditional approach is selected
#' @param id Subject or cluster id variable
#' @param plot Plot object
#' @param cpus Number of parallel computers
#' @details In Jiang et. al (2008), the adaptive c value is chosen from the highest peak in the p* vs. c plot.
#' In Jiang et. al (2009), 95\% CI is taken into account while choosing such an adaptive choice of c.
#' In Thuan Nguyen et. al (2014), the adaptive c value is chosen from the first peak. This approach works better in the
#' moderate sample size or weak signal situations. Empirically, the first peak becomes highest peak when sample size
#' increases or signals become stronger
#' @return
#' \item{models}{list all model candidates in the model space}
#' \item{B}{list the number of bootstrap samples that have been used}
#' \item{lack_of_fit_matrix}{list a matrix of Qs for all model candidates (in columns). Each row is for each bootstrap sample}
#' \item{Qd_matrix}{list a matrix of QM - QM.tilde for all model candidates. Each row is for each bootrap sample}
#' \item{bandwidth}{list the value of bandwidth}
#' \item{model_mat}{list a matrix of selected models at each c values in grid (in columns). Each row is for each bootstrap sample}
#' \item{freq_mat}{list a matrix of coverage probabilities (frequency/smooth_frequency) of each selected models for a given c value (index)}
#' \item{c}{list the adaptive choice of c value from which the parsimonious model is selected}
#' \item{sel_model}{list the selected (parsimonious) model given the adaptive c value}
#' @note bandwidth = (cs[2] - cs[1]) * 3. So it's chosen as 3 times grid between two c values.
#' @references
#' \itemize{
#' \item{Jiang J., Rao J.S., Gu Z., Nguyen T. (2008), Fence Methods for Mixed Model Selection. The Annals of Statistics, 36(4): 1669-1692}
#' \item{Jiang J., Nguyen T., Rao J.S. (2009), A Simplified Adaptive Fence Procedure. Statistics and Probability Letters, 79, 625-629}
#' \item{Thuan Nguyen, Jiming Jiang (2012), Restricted fence method for covariate selection in longitudinal data analysis. Biostatistics, 13(2), 303-314}
#' \item{Thuan Nguyen, Jie Peng, Jiming Jiang (2014), Fence Methods for Backcross Experiments. Statistical Computation and Simulation, 84(3), 644-662}
#' }
#'
#' @examples
#' \dontrun{
#' r =1234; set.seed(r)
#' n = 100; p=15; rho = 0.6
#' beta = c(1,1,1,0,1,1,0,1,0,0,1,0,0,0,0) # non-zero beta 1,2,3,V6,V7,V9,V12
#' id = rep(1:n,each=3)
#' V.1 = rep(1,n*3)
#' I.1 = rep(c(1,-1),each=150)
#' I.2a = rep(c(0,1,-1),n)
#' I.2b = rep(c(0,-1,1),n)
#' x = matrix(rnorm(n*3*11), nrow=n*3, ncol=11)
#' x = cbind(id,V.1,I.1,I.2a,I.2b,x)
#' R = diag(3)
#' for(i in 1:3){
#' for(j in 1:3){
#' R[i,j] = rho^(abs(i-j))
#' }
#' }
#' e=as.vector(t(mvrnorm(n, rep(0, 3), R)))
#' y = as.vector(x[,-1]%*%beta) + e
#' data = data.frame(x,y)
#' raw = "y ~ V.1 + I.1 + I.2a +I.2b"
#' for (i in 6:16) { raw = paste0(raw, "+V", i)}; full = as.formula(raw)
#' bin1="y ~ V.1 + I.1 + I.2a +I.2b"
#' for (i in 6:8) { bin1 = paste0(bin1, "+V", i)}; bin1 = as.formula(bin1)
#' bin2="y ~ V9"
#' for (i in 10:16){ bin2 = paste0(bin2, "+V", i)}; bin2 = as.formula(bin2)
#' # May take longer than 30 min since there are two stages in this RF procedure
#' obj1.RF = RF(full = full, data = data, groups = list(bin1,bin2), method="conditional")
#' obj1.RF$sel_model
#' obj2.RF = RF(full = full, data = data, groups = list(bin1,bin2), B=100, method="marginal")
#' obj2.RF$sel_model
#' }
#' @export
RF = function(full, data, groups, B = 100, grid = 101, bandwidth = NA, plot = FALSE, method = c("marginal", "conditional"), id = "id", cpus = parallel::detectCores()) {
#if (is.na(bandwidth)) {
# stop("Must assign a bandwidth.")
#}
method = match.arg(method)
# 1st step
cands = character(0)
resp = as.character(full)[2]
y = as.matrix(data[,resp])
for (group in groups) {
bs = bootstrap.RF(B, full, group, data)
ms = findsubmodel.RF(full, group, data)
mf = function(model, bootsmp) {
if (is.data.frame(bootsmp)) {return(model$model)}
list(Q = t(bootsmp) %*% model$meat %*% bootsmp,
size = length(model$model))
}
lf = function(x) {x$Q}
pf = function(x) {x$size}
sfInit(parallel = TRUE, cpus = cpus)
sfExportAll()
groupaf = adaptivefence(mf = mf, f = group, ms = ms, d = data, lf = lf, pf = pf, bs = bs, grid = grid, bandwidth = bandwidth)
if (plot) {
print(plot(groupaf))
dev.off()
}
groupc = groupaf$c
Qs = sapply(ms, function(m) t(y) %*% m$meat %*% y)
Qs = Qs - min(Qs)
infence = Qs < groupc
sizes = sapply(ms, function(x) length(x$model))
sizes[!infence] = Inf
Qs[sizes != min(sizes)] = Inf
#cands = c(cands, ms[[which.min(Qs)]]$model)
cands = c(cands, ms[[which.min(Qs)]]$model[-length(ms[[which.min(Qs)]]$model)])
}
cands = unique(cands)
# 2nd step
if (method == "marginal") {
resp = as.character(full)[2]
full2nd = as.formula(paste0(resp, "~", gsub(" ", "+", do.call(paste, as.list(cands)))))
bs = bootstrap.RF2(B, full2nd, data)
ms = findsubmodel.RF2(resp, cands)
sfInit(parallel = TRUE, cpus = cpus)
sfExportAll()
res = adaptivefence(mf = lm, f = full2nd, ms = ms, d = data, lf = function(x) -logLik(x),
pf = function(x) length(attributes(terms(x))$term.labels), bs = bs, bandwidth = NA)
}
if (method == "conditional") {
resp = as.character(full)[2]
full2nd = as.formula(paste0(resp, "~", gsub(" ", "+", do.call(paste, as.list(cands))), "+(1|", id, ")"))
res = fence.lmer(full2nd, data, B = B, grid = grid)
}
class(res) = "RF"
res
}
bootstrap.RF = function(B, full, group, data) {
resp = as.character(full)[2]
fulltms = attributes(terms(full))$term.labels
grouptms = attributes(terms(group))$term.labels
groupxtms = fulltms[!(fulltms %in% grouptms)]
y = data[,resp]
grouptms = as.list(grouptms)
grouptms$sep = "+"
groupxtms = as.list(groupxtms)
groupxtms$sep = "+"
X1 = model.matrix(as.formula(paste0(resp, "~0+", do.call(paste, grouptms))), data)
X2 = model.matrix(as.formula(paste0(resp, "~0+", do.call(paste, groupxtms))), data)
PX2O = diag(nrow(X2)) - X2 %*% ginv(t(X2) %*% X2) %*% t(X2)
PX2OMX1 = PX2O - PX2O %*% X1 %*% ginv(t(X1) %*% PX2O %*% X1) %*% t(X1) %*% PX2O
df = nrow(data) - length(fulltms)
beta1 = ginv(t(X1) %*% PX2O %*% X1) %*% t(X1) %*% PX2O %*% matrix(y, ncol = 1)
sigma = matrix(y, nrow = 1) %*% PX2OMX1 %*% matrix(y, ncol = 1) / df
base = X1 %*% beta1
lapply(1:B, function(i) matrix(base + rnorm(nrow(data), 0, sqrt(sigma)), ncol = 1))
}
findsubmodel.RF = function(full, group, data) {
resp = as.character(full)[2]
fulltms = attributes(terms(full))$term.labels
grouptms = attributes(terms(group))$term.labels
groupxtms = fulltms[!(fulltms %in% grouptms)]
y = data[,resp]
grouptms = as.list(grouptms)
grouptms$sep = "+"
groupxtms = as.list(groupxtms)
groupxtms$sep = "+"
X1 = model.matrix(as.formula(paste0(resp, "~0+", do.call(paste, grouptms))), data)
X2 = model.matrix(as.formula(paste0(resp, "~0+", do.call(paste, groupxtms))), data)
PX2O = diag(nrow(X2)) - X2 %*% ginv(t(X2) %*% X2) %*% t(X2)
res = ""
for (i in 1:length(grouptms)) {
res = append(lapply(res, function(x) c(x, grouptms[i])), lapply(res, function(x) x))
}
res = lapply(res, function(x) x[-1])[-length(res)]
for (i in 1:length(res)) {
X1 = as.matrix(data[,unlist(res[[i]][-length(res[[i]])])])
if (ncol(X1) == 0) {
next
}
res[[i]] = list(model = res[[i]], meat = PX2O - PX2O %*% X1 %*% ginv(t(X1) %*% PX2O %*% X1) %*% t(X1) %*% PX2O)
}
#res = res[!(sapply(res,is.null))]
res = res[!(sapply(res, function(l){is.null(l$meat)}))]
res
}
bootstrap.RF2 = function(B, full2nd, data) {
resp = as.character(full2nd)[2]
m = lm(full2nd, data)
base = fitted(m)
sigma = summary(m)$sigma
newresp = lapply(1:B, function(i) base + rnorm(length(base), 0, sigma))
ans = list()
for (i in 1:B) {
data[,resp] = newresp[[i]]
ans[[i]] = data
}
ans
}
findsubmodel.RF2 = function(resp, tms) {
res = paste(resp, "~1", sep = "")
for (fix in tms) {
res = as.vector(sapply(res, function(x) paste(x, c("", paste("+", fix, sep = "")), sep = "")))
}
lapply(res, as.formula)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.