#' Variable selection in Partial Linear Bivariate penalized Spline fit in generalized spatial model
#'
#' This is an internal function of package \code{GgAM}. Bivariate penalized least squares problem
#' is solved with penalty parameter chosen by GCV or CV. Variable selection by using
#' adaptive LASSO or group SCAD is applied in parametric coefficients.
#'
#' @importFrom grpreg grpreg
#' @importFrom stats gaussian model.frame glm coef runif
#' @param G An object of the type returned by \code{plbpsm} when \code{fit=FALSE}.
#' @param criterion The criterion to choose the penalty parameter lambda. \code{"GCV"} to use
#' generalized cross validation method and \code{"CV"} for cross validation
#' @param family The family object, specifying the distribution and link to use.
#' @param method 'ALASSO' or 'SCAD' to penalize the coefficients for parametric part.
#' @param ind_c The given index of covariates that are selected.
#' @param VS '\code{TRUE}' for using ALASSO/SCAD to select linear variables.
#' @param control A list of fit control parameters to replace defaults returned by \code{\link{plbpsm.control}}.
#' Any control parameters not supplied stay at their default values.
#' @param MI whether model identification is conducted or not.
#' @param ... other arguments.
#' @details
#' This is an internal function of package \code{GgAM}. We propose Iteratively Reweighted Least square based
#' algorithm to get the poilot estimation and then use it to get a a spline-backfitted local polynomial estimation.
#' The smoothing penalty parameter could
#' be chosen by \code{GCV} or \code{CV} using the routines: \code{\link{gplsfitGCV}}.
#' @return A list of fit information.
#' @export
ggrplsfit <- function(G, criterion, method, family, ind_c, VS, control = plbpsm.control(), MI,...)
{
# Deal with NB
if (substr(family$family[1], 1, 17) == "Negative Binomial") {
theta <- family$getTheta()
if (length(theta) == 1) {
G$sig2 <- 1
} else {
if (length(theta) > 2)
find.theta <- TRUE
}
nb.link <- family$link
gambpsname <- 'gplsfitGCV_nb'
} else {
gambpsname <- 'gplsfitGCV'
theta <- 0
}
Terms <- delete.response(G$pterms)
Xpp <- model.matrix(Terms,G$mf,contrasts=G$contrasts)
if (length(G$basis_info)>0){
tem <- "bivariate.smooth"%in%sapply(G$basis_info, class)
}else {
Gb <-NULL
tem <- FALSE}
if (tem){
Gb <- G$basis_info[[length(G$basis_info)]]
V <- Gb$V
Tr <- Gb$Tr
J <- ncol(Gb$Q2)
### standardize before doing ALASSO/SCAD
B <- Gb$B
Q2 <- Gb$Q2
K <- Gb$K
P <- t(Q2)%*%K%*%Q2
lambda <- G$lambda
Xpp <- Xpp[,-1,drop=FALSE]
Xp <- Xpp[Gb$ind,,drop=FALSE]
}else {
BQ2 <- NULL
B <- NULL
Q2 <- NULL
P <- NULL
Gb <-NULL
Xp <- Xpp
lambda <- NULL}
nvars <- NCOL(G$X)
y <- G$y # original data
X <- G$X # original design matrix
if (MI){
X <- cbind(G$Xp,X)
}
if (nvars == 0)
stop("Model seems to contain no terms")
if (NCOL(y) > 1)
stop("y must be univariate unless binomial")
if ((G$intercept & G$m_b!=0)){
X <- X[,-1]
if(is.null(dim(X))){
X <- as.matrix(X)
}
} else {X <- X}
if (length(X)==0){VS=FALSE}
# univariate basis info
UB <- c()
if (MI){
if (length(G$ind.nl)>0){
UB <- G$UB
} else {UB <- NULL}
} else {
if (length(G$basis_info)>0){
for (j in 1:length(G$basis_info)){
if (class(G$basis_info[[j]])[1]=='univariate.smooth'){
UB <- cbind(UB,G$basis_info[[j]]$B)
}}
} else {UB <- NULL}
}
offset <- G$offset
weights <- G$w
n.score <- sum(weights != 0)
### Define some functions to use in 'family'
variance <- family$variance
dev.resids <- family$dev.resids
aic <- family$aic
linkinv <- family$linkinv
linkfun <- family$linkfun
mu.eta <- family$mu.eta
if (NCOL(y) > 1)
stop("y must be univariate unless binomial")
if (!is.function(variance) || !is.function(linkinv))
stop("illegal `family' argument")
gplsfit.control<-list(trace=control$trace,maxstep=control$maxstep,epsilon=control$epsilon)
### main algorithm for grplsfit starts here
Y <- y
n <- nobs <- length(Y)
good <- weights > 0
if (all(!good)) {
warning(gettextf("No observations informative."))
}
if (!is.null(dim(X))){
X <- X[good,,drop = FALSE]
np <- ncol(X)
#X <- apply(X,2,scale)
} else {
X <- X[good,drop = FALSE]
np <- ncol(as.matrix(X))
#X <- apply(as.matrix(X),2,scale)
}
Xp <- cbind(Xp,G$linear_cov)
#print(head(Xp))
colnames(Xp)<-c(colnames(Xpp),G$linear.names)
#print(dim(Xp))
# start of GCV
if (criterion=='GCV'){
# No variable selection
if(VS==FALSE){
### no selection of variables so no ind.c
if (length(Xp)>0){
ind.c <- 1:(dim(Xp)[2])
} else {
ind.c <- 0}
lam2 <- 0
mfit <- do.call(gambpsname,list(y,B,Q2,P,UB,lambda,family,offset,theta,fx=G$fx,control=gplsfit.control,X=X,ind_c=ind.c,...))
print(ind.c)
print(mfit$se_beta)
lam1 <- mfit$lam0
tt <- (y-mfit$Yhat)^2/variance(mfit$Yhat)
sigma_2 <- 1/(length(y)-mfit$df)*sum(tt)
alpha_hat <- mfit$alpha_hat
beta_hat <- mfit$beta_hat
gamma_hat <- mfit$gamma_hat
theta_hat <- mfit$theta_hat
sse <- mfit$sse
res <- mfit$res
gcv <- mfit$gcv
cv <- NULL
Yhat <- mfit$Yhat
edf <- mfit$df
if (length(Xp[,ind.c])>0 && length(mfit$alpha_hat)>0){
se_beta <- mfit$se_beta[1:length(ind.c)]
Ve <- mfit$Ve[1:length(ind.c),1:length(ind.c)]
} else {
se_beta <- NULL
Ve <- NULL
}
}
### variable selection
if(VS==TRUE){
# add more in future
}
}# end of GCV
# get the final estimation here
eta <- mfit$eta_hat
# sbl estimation starts here
if (G$backfitting){
term.smooth <- sapply(G$basis_info, class)[1,]
if (MI){
#term.smooth <- sapply(object$basis_info2, class)[1,]
smooth.univariate <- G$ind.nl
} else {
smooth.univariate <- which(term.smooth=="univariate.smooth")
}
smooth.bivariate <- which(term.smooth=="bivariate.smooth")
X3 <- X2 <- Xp
first.para <- dim(as.matrix(X2))[2]
mhat <- as.matrix(X2)*alpha_hat[1:(dim(as.matrix(X2))[2])]
### univariate part
if (length(smooth.univariate)>0){
for (k in smooth.univariate){
bsinf <- G$basis_info[[k]]
last.para <- first.para+bsinf$n.para
tem <- bsinf$B%*%alpha_hat[(first.para+1):last.para]
temm <- G$mf[,bsinf$term]
first.para <- last.para
if (!is.null(Gb)){
mhat <- cbind(mhat,tem)
# G$Xorig
X2 <- cbind(X2,temm[Gb$ind])} else {
mhat <- cbind(mhat,tem)
# G$Xorig
X2 <- cbind(X2,temm)
}
}
}
### bivariate part
if(length(smooth.bivariate)>0){
bsinf <- G$basis_info[[smooth.bivariate]]
ind <- bsinf$ind
temm <- bsinf$B%*%gamma_hat
mhat <- cbind(mhat,temm)
}
first.para <- alpha0 <- dim(as.matrix(X3))[2]
mhat.sbl <- c()
h_opt_all <- c()
if (length(smooth.univariate)>0){
for (k in 1:length(smooth.univariate)){
alpha0 <- first.para+k
XX <- if (is.null(dim(X2))){X2} else {X2[,alpha0]}
fun1 <- key_functionsSBLL(y,X2,alpha0,mhat,sigma_2,XX,family=family
,basisinfo=G$basis_info[[smooth.univariate[k]]])
h_opt <-
length(y)^(-0.2)*(5*7*sum(fun1$esigma/fun1$df_fixed)/sum(fun1$beta_dev2^2))^(0.2)
x0 <- XX
initial <- runif(length(x0))-0.5
if (length(initial)>10000){
Ind <- seq(1,length(initial),by=30)} else {Ind <- seq(1,length(initial),by=1)}
m_sbk <- SBK_locp(y,X2,initial[Ind],alpha0,x0[Ind],mhat,h_opt,family=family)
mhat.sbl <- cbind(mhat.sbl,m_sbk)
h_opt_all <- c(h_opt_all,h_opt)
}
}
if (!is.null(dim(Xp))) {
Xp2 <- Xp
#print(dim(Xp2))
para <- if (!is.null(dim(Xp2)) && dim(Xp2)[2]>0) {as.matrix(Xp2)*alpha_hat[1:(dim(as.matrix(Xp2))[2])]} else {NULL}
} else {
para <- NULL}
eta_sbl <- rowSums(as.matrix(cbind(para[Ind],as.matrix(mhat.sbl),beta_hat[Ind])))
# print(para)
# print(mhat.sbl)
# print(as.matrix(cbind(para[Ind],mhat.sbl,beta_hat[Ind])))
y_sbl <- linkinv(eta_sbl)
# eta_sbl <- rep(0,length(beta_hat))
# y_sbl <- linkinv(eta_sbl)
} else {
y_sbl <- rep(NA,length(y))
Ind <- 1:length(y)
mhat.sbl <- NULL
h_opt_all <- NULL
mhat <- NULL
X3 <- X2 <- NULL
}
if (any(!is.finite(alpha_hat))) {
warning(gettextf("Non-finite coefficients"))}
eta <- as.matrix(eta)
mu <- linkinv(eta <- eta + offset)
eta <- linkfun(mu)
dev <- sum(dev.resids(Y, mu, weights))
if (mfit$boundary)
warning("Algorithm stopped at boundary value")
# check with specific family
eps <- 10 * .Machine$double.eps
if (family$family[1] == "binomial") {
if (any(mu > 1 - eps) || any(mu < eps))
warning("fitted probabilities numerically 0 or 1 occurred")
}
if (family$family[1] == "poisson") {
if (any(mu < eps))
warning("fitted rates numerically 0 occurred")
}
### rewrite family with theta
if (substr(family$family[1], 1, 17) == "Negative Binomial"){
family<-do.call("negbin",list(theta=mfit$est_theta,link=nb.link))
}
residuals <- rep(NA, nobs)
### not sure
residuals[good] <- mfit$z - (eta-offset)[good]
### For those points not in the triangles, return "NA"
prior_weights_all <- G$y <- residuals_all <- weights_all <- eta_all <- fitted.values_all <- fitted.values_all_sbl <- rep(NA,dim(G$mf)[1])
if (!is.null(Gb$ind)){
G$y[Gb$ind]<-y
fitted.values_all[Gb$ind]<-mu
fitted.values_all_sbl3 <- fitted.values_all_sbl[Gb$ind]
fitted.values_all_sbl3[Ind]<-y_sbl
fitted.values_all_sbl[Gb$ind]<-fitted.values_all_sbl3
residuals_all[Gb$ind] <- residuals
#residuals_all <- NULL
eta_all[Gb$ind] <- eta
weights_all[Gb$ind] <- if (is.null(mfit$w)){rep(1,length(y))} else {mfit$w}
prior_weights_all[Gb$ind] <- weights
} else {
G$y<-y
fitted.values_all<-mu
fitted.values_all_sbl3 <- fitted.values_all_sbl
fitted.values_all_sbl3[Ind]<-y_sbl
fitted.values_all_sbl<-fitted.values_all_sbl3
residuals_all <- residuals
eta_all <- eta
weights_all <- if (is.null(mfit$w)){rep(1,length(y))} else {mfit$w}
prior_weights_all<- weights
}
dev_sbl <- sum(dev.resids(Y, y_sbl, weights))
wtdmu <- if (G$intercept)
sum(weights * Y)/sum(weights) else linkinv(offset)
nulldev <- sum(dev.resids(Y, wtdmu, weights))
n.ok <- nobs - sum(weights == 0)
nulldf <- n.ok - as.integer(G$intercept)
rank2<-ifelse(is.null(Gb),0,dim(Gb$Q2)[1])
aic.model <- aic(Y, n, mu, weights, dev) + 2 * sum(G$edf)
object <- list(coefficients = as.vector(alpha_hat), coefficients_bivariate = as.vector(gamma_hat),
residuals = residuals_all,fitted.values.sbl=fitted.values_all_sbl,Xp=X3,dev_sbl=dev_sbl,#fitted.values_all_sbl2=fitted.values_all_sbl2,
fitted.values = fitted.values_all, family = family, linear.predictors = eta_all,
deviance = dev, null.deviance = nulldev, edf=edf, prior.weights=prior_weights_all, weights = weights_all,#edfs=edfs,
df.null = nulldf,converged = mfit$conv,backfitting=G$backfitting,
y = G$y, est_theta=mfit$est_theta,middle=mfit$middle,#scale=G$scale,
r = G$r, hat=mfit$hat, nsdf = G$nsdf, Ve = Ve, iter=mfit$iter,
gcv_opt= gcv, h_opt_all=h_opt_all,cv_opt=cv, aic = aic.model,
boundary = mfit$boundary,mhat.sbl=mhat.sbl,mhat=mhat,X2=X2,
theta_hat=as.vector(theta_hat),ind_c=ind.c,sse=sum(residuals_all^2),
se_beta=se_beta,lam1=lam1,lam2=lam2, Ve=Ve,intercept=G$intercept,sigma_2=sigma_2,VS=VS)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.