cdm <- function(candi_models,x)
{
p <- ncol(x);
s=as.matrix(candi_models)
if ((nrow(s)==1) && (ncol(s)==1)){
if (candi_models == 1){
s <- matrix(1,nrow=p,ncol=p)
s[upper.tri(s)] <- 0
s <- rbind(0,s)
}
if (candi_models == 2){
s <- matrix(0,nrow=2^p,ncol=p)
s0 <- matrix(c(1,rep(0,p-1)),1,p)
s1 <- matrix(c(rep(0,p)),1,p)
for (i in 2:2^p){
s1 <- s0 + s1
for (j in 1:p){
if (s1[1,j] == 2){
s1[1,j+1] <- s1[1,j+1]+1
s1[1,j] <- 0
}
}
s[i,] <- s1
}
}
}
candi_models=s
return(s)
}
wFIC.normal <- function(Y, X, Z, XZeval=cbind(X,Z), weightsvec=rep(1,nrow(XZeval)),
subsetsmatrix, partial.delta.theta, partial.delta.gamma)
{
weightsvec <- as.vector(weightsvec)
qq = ncol(as.matrix(Z))
pp = ncol(as.matrix(X))
nn = nrow(as.matrix(X))
XZeval = matrix(XZeval,nrow = nn)
X.eval = XZeval[,1:pp]; X.eval<-matrix(X.eval,nrow = nn)
Z.eval = XZeval[,-(1:pp)]; Z.eval<-matrix(Z.eval,nrow = nn)
full.fit = lm(Y~X+Z-1) # exclude intercept since already in the X-part
sigma.hat.sq<-as.vector(t(full.fit$residuals)%*%full.fit$residuals/(nn-pp-qq))
J <- matrix(0,nrow<-(pp+qq),ncol<-(pp+qq))
#J[1,1]<-2
J[(1:(pp)),(1:(pp))] <- t(X)%*%X
J[(pp+1):(pp+qq),(1:(pp))] <- t(Z)%*%X
J[(1:(pp)),(pp+1):(pp+qq)] <- t(X)%*%Z
J[(pp+1):(pp+qq),(pp+1):(pp+qq)] <- t(Z)%*%Z
J <- J/(nn*sigma.hat.sq)
J11 <- J[(pp+1):(pp+qq),(pp+1):(pp+qq)]
J00 <- J[1:(pp),1:(pp)]
J10 <- J[(pp+1):(pp+qq),1:(pp)]
invJ <- solve(J)
K <- invJ[-(1:(pp)),-(1:(pp))]
## estimate (beta,gamma) in biggest model
betagamma.hat <- full.fit$coefficients
deltahat <- nn^{1/2}*betagamma.hat[-(1:pp)]
n.eval <- nrow(XZeval)
wFIC.model<-rep(NA,nrow(subsetsmatrix))
XZeval <- as.matrix(XZeval)
X.eval <- XZeval[,1:pp]; X.eval<-matrix(X.eval,ncol=pp)
Z.eval <- XZeval[,-(1:pp)]; Z.eval<-matrix(Z.eval,ncol=qq)
#CAR create the A matrix, as defined on p. 507 of Claeskens and Hjort (5008a)
omega.omega.t.sum<-matrix(0,nrow<-qq,ncol<-qq)
weights.sum<-0
for (k in 1:n.eval) {
if (weightsvec[k] > 0) {
this.x <- X.eval[k,]
this.z <- Z.eval[k,]
omega <- J10%*%solve(J00)%*%partial.delta.theta(this.x,this.z) -
partial.delta.gamma(this.x,this.z)
omega.omega.t.sum <- omega.omega.t.sum + omega%*%t(omega) * weightsvec[k]
weights.sum <- weights.sum+weightsvec[k]
}
}
A <- omega.omega.t.sum/weights.sum
Iq <- diag(rep(1,qq))
for (j in 1:nrow(subsetsmatrix)) {
indicS <- subsetsmatrix[j,]*(1:qq)
variables <- subsetsmatrix[j,]
if (sum(variables) > 0)
{
qq <- length(variables)
indic <- variables*(1:qq)
Id <- diag (rep(1,qq))
pi.S <- matrix(Id[indic,],ncol=qq)
K.S <- solve(pi.S %*% solve(K) %*% t(pi.S))
G.S <- t(pi.S)%*%K.S%*%pi.S%*%solve(K) ###p.506 of Claeskens and Hjort (5008a)
sq.bias.estimate <- sum(
diag((Iq-G.S)%*%(deltahat%*%t(deltahat)-K)%*%t((Iq-G.S))%*%A))
wFIC.model[j] <- max(sq.bias.estimate,0) + sum(diag(A%*%t(pi.S)%*%K.S%*%pi.S))
} else {
wFIC.model[j] <- max(sum(diag((deltahat%*%t(deltahat)-K)%*%A)), 0)
}
wFIC= exp(-0.5*wFIC.model/c(t(omega)%*%K%*%omega))/sum(exp(-0.5*wFIC.model/c(t(omega)%*%K%*%omega)))
}
# return(wFIC.model)
return(wFIC)
}
ARMS <-function(x, y,method = "L1-ARM",candi_models=candi_models, n_train=ceiling(n/2), no_rep=50)
{
x = as.data.frame(x) ; p = ncol(x) ; n = length(y)
candi_models = cdm(candi_models,x); n_mo = nrow(candi_models) ; sk = rowSums(candi_models)
wt_calc <- function(rep_id) {
lwnmutr = lwnmute = rep(NA, n_mo);
tridx = sample(n, n_train, replace = FALSE)
for (j in seq(n_mo)) {
varindex = (candi_models[j, ] == 1)
trdat = data.frame(y[tridx] , x[tridx, varindex] ) ; names(trdat) = c( "y",names(x)[varindex] )
tedat = data.frame(y[-tridx] , x[-tridx, varindex]) ; names(tedat) = c( "y",names(x)[varindex] )
glmtr = glm(y ~., data = trdat) ; #glmte = glm(y ~., data = tedat)
if( any( is.na(glmtr$coef) ) ) {
lwnmutr[j] = lwnumte[j] = -Inf
}
else {
if(method=="L1-ARM") {
sigtr_k = mean(abs(glmtr$res))
#sigte_k = mean(abs(glmte$res))
trdk = sum( abs(tedat$y - predict(glmtr,newdata = tedat) ) )
#tedk = sum( abs(trdat$y - predict(glmte,newdata = trdat) ) )
lwnmutr[j] = - n_train * log(sigtr_k) - ((sigtr_k)^(-1)) * trdk
#lwnmute[j] = -(n-n_train) * log(sigte_k) - ((sigte_k)^(-1)) * tedk
}
if(method=="ARM") {
sigtr_k = sqrt(sum((glmtr$res)^2)/(n_train-sk[j]-1))
#sigte_k = sqrt(sum((glmte$res)^2)/(n-n_train-sk[j]-1))
trdk = sum( (tedat$y - predict(glmtr, newdata = tedat) )^2 )
#tedk = sum( (trdat$y - predict(glmte, newdata = trdat) )^2 )
lwnmutr[j] = - n_train * log(sigtr_k) - ((sigtr_k)^(-2)) * trdk/2
#lwnmute[j] = -(n-n_train) * log(sigte_k) - ((sigte_k)^(-2)) * tedk/2
}
}
}
lw_num = lwnmutr#(lwnmutr+lwnmute)/2
return(lw_num)
}
lw_num = matrix(unlist(mclapply(seq(no_rep), wt_calc)), nrow = no_rep, ncol = n_mo, byrow = TRUE)
lw_num = sweep(lw_num, MARGIN = 1, apply(lw_num, 1, max), "-")
w_num = exp(lw_num)
weight = colMeans(w_num/rowSums(w_num))
weight_se = (apply(w_num, 2, sd))/sqrt(no_rep)
list(weight = weight,weight_se = weight_se )
}
gma<-function (x, y,factorID=NULL, method = "L1-ARM", candi_models = 2, n_train=ceiling(n/2), no_rep=50)
{
x = as.data.frame(x)
if(is.null(factorID)==FALSE)
{for (i in 1:length(factorID))
{
x[,(factorID[i])] = factor(x[,factorID[i]])
}
}
p = ncol(x); n = length(y) ; candi_models = cdm(candi_models,x)
n_mo = nrow(candi_models) ; ik = sk = rowSums(candi_models)
ee = eej = matrix(0, nrow = nrow(x), ncol = n_mo)
bname=names(glm( y ~., data=x)$coefficients)
betahat=matrix(0,length(bname),n_mo) ; rownames(betahat)=bname
for (i in 1:n_mo) {
dati = data.frame(y,x[, candi_models[i, ] == 1])
names(dati) = c("y",names(x)[candi_models[i, ] == 1])
LSL = glm(y~., data=dati)
sk[i] = length((LSL)$coefficients)
betahat[names((LSL)$coefficients),i] = (LSL)$coefficients
ee[, i] = residuals(LSL)
hi = hatvalues(LSL)
eej[, i] = ee[, i] * ( 1 / (1 - hi) )
if(method == "SBIC") ik[i] <- BIC(LSL) #n * log(rss/n) + sk[i] * log(n)
if(method == "SAIC") ik[i] <- AIC(LSL) #n * log(rss/n) + sk[i] * 2
}
if (method == "SAIC" | method == "SBIC") {
weight = exp(-0.5 * (ik - min(ik)))/sum(exp(-0.5 * (ik - min(ik))))
weight = matrix(weight,n_mo,1)
wbetahat = betahat %*% weight
return(list(weight = weight, wbetahat=wbetahat, betahat=betahat, candi_models=candi_models))
}
if (method == "MMA" | method == "JMA") {
if (method == "MMA") {
a1 = t(ee) %*% ee
mK = which.max((sk))
sighat = (t(ee[, mK]) %*% ee[, mK])/(n - candi_models[mK])
a2 = matrix(c(-sighat) * sk)
}
if (method == "JMA") {
a1 = t(eej) %*% eej
a2 = matrix(0, nrow = n_mo, ncol = 1)
}
if (qr(a1)$rank < ncol(ee))
a1 = a1 + diag(n_mo) * 1e-10
if (qr(a1)$rank < ncol(ee))
a1 = a1 + diag(n_mo) * 1e-08
if (qr(a1)$rank < ncol(ee))
a1 = a1 + diag(n_mo) * 1e-06
a3 = t(rbind(matrix(1, nrow = 1, ncol = n_mo), diag(n_mo),-diag(n_mo)))
a4 = rbind(1, matrix(0, nrow = n_mo, ncol = 1), matrix(-1, nrow = n_mo, ncol = 1))
w0 = matrix(1, nrow = n_mo, ncol = 1)/n_mo
QP = solve.QP(a1, a2, a3, a4, 1)
w = QP$solution
w = as.matrix(w)
w = w * (w > 0)
weight = w/sum(w0)
weight = matrix(weight,n_mo,1)
wbetahat = betahat %*% weight
return(list(weight = weight, wbetahat=wbetahat, betahat=betahat, candi_models=candi_models))
}
if (method == "L1-ARM" | method == "ARM") {
if(is.null(factorID)==FALSE){
for(i in 1:length(factorID)){
h=summary(x[, factorID[i]])
if (min(h)<=(n/length(h)/2))
{
stop("The level of categorical data appears to be extremely unbalanced, and the number of observations at some levels is too small. It is recommended to try again after deal with categorical data reasonablely.")
}
}
}
if (method == "L1-ARM") {
ar = ARMS(x, y,method = "L1-ARM",candi_models=candi_models, n_train=ceiling(n/2), no_rep=50)
}
if (method == "ARM") {
ar = ARMS(x, y,method = "ARM",candi_models=candi_models, n_train=ceiling(n/2), no_rep=50)
}
weight = ar$weight
weight = matrix(weight,n_mo,1)
wbetahat = betahat %*% weight
weight_se = ar$weight_se
return(list(weight = weight, weight_se = weight_se, wbetahat=wbetahat, betahat=betahat, candi_models=candi_models))
}
if (method == "SFIC") {
wf = wFIC.normal(Y = y, X = matrix(1, nrow = length(y)),
Z = as.matrix(x), subsetsmatrix = candi_models, partial.delta.theta = function(x, z)
{c(1)}, partial.delta.gamma = function(x, z) { z})
weight = wf
weight = matrix(weight,n_mo,1)
wbetahat = betahat %*% weight
return(list(weight = weight, wbetahat=wbetahat, betahat=betahat, candi_models=candi_models))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.