Nothing
###################################
# These functions are not user level functions
# They require a very specific input format and they
# rely on the functions calling them for input checking
# These functions are not exported in the NAMESPACE file
###################################
oldRidge <- function(beta, eta, Lambda, X, fit, trace = FALSE, epsilon = 1e-8, maxiter = 25) {
if (missing(eta)) eta <- drop(X %*% beta)
localfit <- fit(eta)
iter <- 0
oldLL <- -Inf
finished <- FALSE
while (!finished)
{
iter <- iter + 1
if (trace) {
cat(iter)
flush.console()
}
if (is.matrix(Lambda)) {
grad <- crossprod(X, localfit$residuals) - Lambda %*% beta # TODO: WHY WOULD LAMBDA BE A MATRIX?
} else {
grad <- crossprod(X, localfit$residuals) - Lambda * beta
}
if ( length(localfit$W$diagW) > 0 & length(localfit$W$P) > 1 ) {
XdW <- X * matrix(sqrt(localfit$W$diagW), nrow(X), ncol(X))
Hess <- -crossprod(XdW) + crossprod(crossprod(localfit$W$P, X))
} else if (length(localfit$W$diagW) > 0) {
XW <- X * matrix(sqrt(localfit$W$diagW), nrow(X), ncol(X))
Hess <- -crossprod(XW)
} else {
Hess <- -crossprod(X)
}
if (is.matrix(Lambda)) {
Hess <- Hess - Lambda
} else {
diag(Hess) <- diag(Hess) - Lambda
}
shg <- drop(.solve(Hess, grad))
beta <- beta - shg
eta <- drop(X %*% beta)
if (trace) cat(rep("\b", trunc(log10(iter))+1), sep ="")
localfit <- fit(eta)
if (is.na(localfit$loglik) || iter == maxiter) {
if (trace) {
if (iter > 0) cat(rep("\b", trunc(log10(iter))+1), sep ="")
warning("Model does not converge: please increase lambda.", call.=FALSE)
}
break
}
if (is.matrix(Lambda)) {
penalty <- as.numeric(0.5 * sum(beta * (Lambda %*% beta)))
} else {
penalty <- as.numeric(0.5 * sum(Lambda * beta * beta))
}
LL <- localfit$loglik - penalty
# Check convergence
finished <- ( 2 * abs(LL - oldLL) / (2 * abs(LL) + 0.1) < epsilon )
half.step <- (LL < oldLL)
if (half.step) {
beta <- beta + 0.5 * shg
eta <- drop(X %*% beta)
localfit <- fit(eta)
if (is.matrix(Lambda)) {
penalty <- as.numeric(0.5 * sum(beta * (Lambda %*% beta)))
} else {
penalty <- as.numeric(0.5 * sum(Lambda * beta * beta))
}
LL <- localfit$loglik - penalty
}
oldLL <- LL
}
return(list(beta = beta, penalty = c(L1 = 0, L2 = penalty), fit = localfit, iterations = iter, converged = finished))
}
###################################
# The core lasso and elastic net algorithm
###################################
.lasso <- function(beta, lambda, lambda2=0, positive, X, fit, trace = FALSE,
epsilon, maxiter) {
# It is a general function for fitting L1-penalized models
# possibly with an additional L2-penalty
# Input:
# beta: a vector of length m (say) : starting values
# lambda: a vector of length m
# fit must be function(beta)
# Should return a list with at least:
# W: The weights matrix, or its diagonal
# loglik: The unpenalized loglikelihood, numeric)
# residuals: The residuals
betaNames = names(beta)
result = .Call('penalized_Lasso', PACKAGE = 'penalized', beta, lambda, lambda2, positive, X, fit, trace, epsilon, maxiter)
result$beta = as.numeric(result$beta)
names(result$beta) = betaNames
return(result)
}
###################################
# Adjusted lasso algorithm
# Tries to prevent large models by first fitting at higher values of lambda
# Often faster for "pure" lasso
# Not recommended for elastic net
###################################
.steplasso <- function(beta, lambda, lambda2=0, positive, X, fit, trace = FALSE,
epsilon, maxiter = Inf) {
betaNames = names(beta)
out = .Call('penalized_StepLasso', PACKAGE = 'penalized', beta, lambda,
lambda2, positive, X, fit, trace, epsilon, maxiter)
out$beta = as.numeric(out$beta)
names(out$beta) = betaNames
return(out)
}
###################################
# Park & Hastie's suggestion for the next lambda where the active set changes
###################################
.park <- function(beta, lambda, lambda2=0, positive, X, fit) {
gradient <- drop(crossprod(X[,drop=FALSE], fit$residuals))
active1 <- ifelse(positive, (gradient - lambda) / lambda > -1e-4, (abs(gradient) - lambda) / lambda > -1e-4)
active2 <- ifelse(positive, beta > 0, beta != 0)
active <- active2 | active1
if (sum(active) > 0) {
activeX <- X[,active,drop=FALSE]
if (length(fit$W$diagW) > 0 & length(fit$W$P) > 1) {
XadW <- activeX * matrix(sqrt(fit$W$diagW), nrow(activeX), ncol(activeX))
XaWXa <- crossprod(XadW) - crossprod(crossprod(fit$W$P, activeX))
XdW <- X * matrix(sqrt(fit$W$diagW), nrow(X), ncol(X))
XWXa <- crossprod(XdW, XadW) - crossprod(crossprod(fit$W$P, X), crossprod(fit$W$P, activeX))
} else if (length(fit$diagW) > 0) {
XaW <- activeX * matrix(sqrt(fit$diagW), nrow(activeX), ncol(activeX))
XaWXa <- crossprod(XaW)
XW <- X * matrix(sqrt(fit$diagW), nrow(X), ncol(X))
XWXa <- crossprod(XW, XaW)
} else {
XaWXa <- crossprod(activeX)
XWXa <- crossprod(X, activeX)
}
signbetaactive <- ifelse(sign(beta[active]) == 0, sign(gradient[active]), sign(beta[active]))
temp <- solve(XaWXa, signbetaactive * lambda[active])
aa <- XWXa %*% temp / lambda
hh1 <- (1 - gradient / lambda) / (1-aa)
hh1[hh1 < 0] <- Inf
hh2 <- (1 + gradient / lambda) / (1+aa)
hh2[hh2 < 0] <- Inf
hh3 <- - beta[active] / temp
hh <- numeric(length(beta))
hh[!active] <- pmin(hh1[!active], hh2[!active])
hh[active] <- hh3
if (all(hh <= 0)) hh <- Inf else hh <- min(hh[hh>=0])
if (hh < 1e-4) {hh <- 1e-4 }
} else {
hh <- 1e-4
}
newbeta <- beta
newbeta[active] <- newbeta[active] + hh * temp
list(hh = hh, beta = newbeta)
}
###################################
# The core ridge algorithm
###################################
.ridge <- function(beta, eta, Lambda, X, fit, trace = FALSE, epsilon = 1e-8, maxiter = 25, fitIN = list()) {
ROnly = T # C++ ridge implementation is slower than R's...
if (ROnly)
{
return(oldRidge(beta, eta, Lambda, X, fit, trace, epsilon, maxiter))
}
else
{
if (missing(eta)) eta <- drop(X %*% beta)
betaNames = names(beta)
out = .Call('penalized_Ridge', PACKAGE = 'penalized', beta, eta, as.matrix(Lambda), X, fit, trace, epsilon, maxiter, fitIN)
out$beta = as.numeric(out$beta)
names(out$beta) = betaNames
return(out)
}
}
###################################
# Workhorse function for cross-validated likelihood
###################################
.cvl <- function(X, lambda1, lambda2, positive, beta, fit, chr, fusedl, groups, trace = FALSE,
betas = NULL, quit.if.failed = TRUE, save.predictions = TRUE, ...) { # TODO: remove tesiting args
n <- nrow(X)
m <- ncol(X)
# find the right fitting procedure
useP <- FALSE
if(!fusedl){
if (all(lambda1 == 0) && !any(positive)) {
if (m <= n) {
cvfit <- function(leftout, beta) {
subfit <- function(lp) fit$fit(lp, leftout)
.ridge(beta = beta, Lambda = lambda2, X = X[!leftout,,drop = FALSE], fit = subfit, ...)
}
} else {
useP <- TRUE
P <- .makeP(X, lambda2)
PX <- P %*% t(X)
Pl <- P * matrix(sqrt(lambda2), nrow(P), ncol(P), byrow = TRUE)
cvfit <- function(leftout, beta) {
subfit <- function(lp) fit$fit(lp, leftout)
gams <- .solve(tcrossprod(P), P %*% beta)
PlP <- tcrossprod(Pl)
out <- .ridge(beta = gams, Lambda = PlP, X = t(PX[,!leftout,drop = FALSE]),
fit = subfit, ...)
out$beta <- drop(crossprod(P, out$beta))
names(out$beta) <- names(beta)
out
}
}
} else if (all(lambda2 == 0)) {
cvfit <- function(leftout, beta) {
subfit <- function(lp) fit$fit(lp, leftout)
.steplasso(beta = beta, lambda = lambda1, positive = positive,
X = X[!leftout,,drop = FALSE], fit = subfit, ...)
}
} else {
cvfit <- function(leftout, beta) {
subfit <- function(lp) fit$fit(lp, leftout)
.lasso(beta = beta, lambda = lambda1, lambda2 = lambda2, positive = positive,
X = X[!leftout,,drop = FALSE], fit = subfit, ...)
}
}
} else if (fusedl){
cvfit <- function(leftout, beta) {
subfit <- function(lp) fit$fit(lp, leftout)
.flasso(beta = beta, lambda1 = lambda1, chr = chr, lambda2 = lambda2, positive = positive,
X = X[!leftout,,drop = FALSE], fit = subfit,...)
}
}
# "groups" input lists fold allocation for each subject %in% 1:fold
fold <- max(groups)
# fit the full model and make an m x fold matrix of beta if necessary
fullfit <- cvfit(logical(n), beta)
if (is.null(betas)) {
if (fullfit$converged)
betas <- matrix(fullfit$beta, m, fold)
else
betas <- matrix(beta, m, fold)
}
# True cross-validation starts here
if (fold > 1) {
failed <- FALSE
predictions <- vector("list", fold)
cvls <- sapply(1:fold, function(i) {
if (!failed) {
if (trace) {
cat(i)
flush.console()
}
leaveout <- (groups == i)
foldfit <- cvfit(leaveout, betas[,i])
lin.pred <- numeric(n)
lin.pred[leaveout] <- X[leaveout, foldfit$beta != 0, drop=FALSE] %*%
foldfit$beta[foldfit$beta != 0]
lin.pred[!leaveout] <- foldfit$fit$lp0
if (save.predictions)
predictions[[i]] <<- fit$prediction(lin.pred[leaveout], foldfit$fit$nuisance, leaveout)
betas[,i] <<- foldfit$beta
if (trace) cat(rep("\b", trunc(log10(i))+1), sep ="")
out <- fit$cvl(lin.pred, leaveout)
if (quit.if.failed && (is.na(out) || abs(out) == Inf || foldfit$converged == FALSE)) failed <<- TRUE
} else {
out <- NA
}
out
})
if (failed || any(is.na(cvls))) cvls <- -Inf
} else {
cvls <- NA
predictions <- NA
betas <- NA
}
list(cvl = sum(cvls), cvls=cvls, fit = fullfit, betas = betas, predictions = predictions)
}
#######################################
# makes a reduced basis for L2-penalized newton-raphson in the p > n case
#######################################
.makeP <- function(X, lambda2, lambda1 = 0, signbeta) {
n <- nrow(X)
p <- ncol(X)
free2 <- (lambda2 == 0)
free1 <- all(lambda1 == 0)
m <- sum(free2)
if (free1) {
P <- matrix(0, n+m, p)
} else {
P <- matrix(0, n+m+1, p)
}
# First columns: free variables in case of no L2-penalization
for (i in seq_len(m)) P[i, which(free2)[i]] <- 1
# Next n columns: column span of X
P[m + 1:n, which(!free2)] <- X[,!free2,drop=FALSE] * matrix(1/lambda2[!free2], n, p-m, byrow=TRUE)
# Additional column due to L1-penalization
if (!free1) {
P[n+m+1,which(!free2)] <- (lambda1*signbeta/lambda2)[!free2]
}
# check for singularity
rownames(P) <- paste("V", 1:nrow(P))
qrP <- qr(t(P)) # not efficient?
if (qrP$rank < nrow(P)) {
qR <- qr.R(qrP)
keep <- colnames(qR)[sort.list(abs(diag(qR)), decreasing=TRUE)[1:qrP$rank]]
P <- P[keep,]
}
colnames(P) <- NULL
return(P)
}
#######################################
# a solve() function that does not complain about near-singularity
# often dangerous, but very useful here
#######################################
.solve <- function(a,b) {
out <- try(qr.coef(qr(a, LAPACK=TRUE), b))
if (is(out, "try-error")) stop("Matrix inversion failed. Please increase lambda1 and/or lambda2", call. = FALSE)
return(out)
}
#######################################
# calculate cross-validation folds
#######################################
.getFolds <- function(fold, n) {
if(length(fold) == 1) {
if (fold == n) {
groups <- 1:n
} else {
groups <- sample(n) %% fold + 1
}
} else {
if (length(fold) == n) {
groups <- fold
fold <- max(groups)
} else {
stop("incorrect input of \"fold\"", call.=FALSE)
}
}
if (!all(1:fold %in% groups)) stop("incorrect input of \"fold\"", call.=FALSE)
return(groups)
}
.cvlapprox <- function(X, lambda1, lambda2, positive, beta, fit, groups,trace=FALSE,
betas=NULL, quit.if.failed=TRUE, save.predictions=TRUE, ...)
{
n <- nrow(X)
m <- ncol(X)
fold <- max(groups)
if(fold==n) #if loocv, make sure vector "groups" is ordered
{
groups <- c(1:n)
}
useP <- FALSE
if(m<=n) #use beta
{
fullfit = .ridge(beta = beta, Lambda = lambda2, X = X, fit = fit$fit)
if(!fullfit$converged)
{
cvls <- -Inf
predictions <- NA
betas <- NA
return(list(cvl = sum(cvls), cvls=cvls, fit = fullfit, betas = betas, predictions = predictions))
}
beta <- fullfit$beta
linpreds <- fullfit$fit$lp
delta <- fullfit$fit$residuals
W <- fullfit$fit$W
#check corresponding model: logistic/poisson, linear or Cox by inspection of W
if ( length(W$diagW) > 0 & length(W$P) > 1 ) #cox regression
{
diagW = W$diagW
#W = diag(diagW)
beta_int <- c(0,beta)
Xint <- cbind(1,X)
# XWX = crossprod(W^{1/2}X)
# W^{1/2}X = matrix(sqrt(dW), nrow(X), ncol(X)) * X
# inv <- solve(t(Xint)%*%W%*%Xint+diag(c(0,lambda)))
# inv <- solve(crossprod(matrix(sqrt(diagW),nrow(X),ncol(X))*X)+diag(c(0,lambda)))
XWXlambda <- crossprod(matrix(sqrt(diagW),nrow(Xint),ncol(Xint))*Xint)
diag(XWXlambda) <- diag(XWXlambda) + c(0,lambda2)
inv_tXint <- solve(XWXlambda,t(Xint))
WXint <- matrix(diagW,nrow(Xint),ncol(Xint))*Xint
if(fold < n) #kfold
{
betas <- matrix(0,(m+1),fold)
for(i in 1:fold)
{
#beta_k <- .getBeta(Xint,beta_int,groups,i,W,delta,inv)
beta_k <- .getBeta2(WXint,beta_int,groups,i,delta,inv_tXint)
betas[,i] <- beta_k
}
}
else #loocv
{
#V <- W^(1/2)%*%Xint%*%inv%*%t(Xint)%*%W^(1/2)
V <- WXint%*%inv_tXint
betas <- matrix(beta_int,length(beta_int),fold)
#betas_int <- betas_int - inv%*%t(Xint)%*%diag(1/(1-diag(V))*delta)
betas <- betas - inv_tXint*matrix((1/(1-diag(V))*delta),nrow(inv_tXint),ncol(inv_tXint),byrow=TRUE)
}
X <- Xint # nodig voor lin.pred
}
else if (length(W$diagW) > 0) #poisson/logistic regression
{
diagW <- W$diagW
XWXlambda <- crossprod(matrix(sqrt(diagW),nrow(X),ncol(X))*X)
diag(XWXlambda) <- diag(XWXlambda) + lambda2
inv_tX <- solve(XWXlambda,t(X))
WX <- matrix(diagW,nrow(X),ncol(X))*X
#inv <- solve(t(X)%*%W%*%X+diag(lambda))
if(fold < n) #kfold
{
betas <- matrix(0,m,fold)
for(i in 1:fold)
{
beta_k <- .getBeta2(WX,beta,groups,i,delta,inv_tX)
betas[,i] <- beta_k
}
}
else #loocv
{
V <- WX%*%inv_tX #forceSymmetric could be useful
betas <- matrix(beta,m,fold)
betas <- betas - inv_tX*matrix((1/(1-diag(V))*delta),nrow(inv_tX),ncol(inv_tX),byrow=TRUE)
}
}
else #linear regression
{
XXlambda <- crossprod(X)
diag(XXlambda) <- diag(XXlambda) + lambda2
inv_tX <- solve(XXlambda,t(X))
#inv <- solve(t(X)%*%X+diag(lambda))
if(fold < n) #kfold
{
betas <- matrix(0,m,fold)
for(i in 1:fold)
{
beta_k <- .getBetaLinear2(X,beta,groups,i,delta,inv_tX)
betas[,i] <- beta_k
}
}
else #loocv
{
V <- X%*%inv_tX
betas <- matrix(beta,m,fold)
betas <- betas - inv_tX*matrix((1/(1-diag(V))*delta),nrow(inv_tX),ncol(inv_tX),byrow=TRUE)
}
}
}
else #use gamma
{
useP <- TRUE
P <- .makeP(X, lambda2)
PX <- P %*% t(X)
Pl <- P * matrix(sqrt(lambda2), nrow(P), ncol(P), byrow = TRUE)
gams <- .solve(tcrossprod(P), P %*% beta) #initial gamma
PlP <- tcrossprod(Pl)
fullfit <- .ridge(beta = gams, Lambda = PlP, X = t(PX), fit = fit$fit)
gams <- fullfit$beta #new gamma
fullfit$beta <- drop(crossprod(P, gams)) #prepare fullfit for return statement
names(fullfit$beta) <- names(beta)
if(!fullfit$converged)
{
cvls <- -Inf
predictions <- NA
betas <- NA
return(list(cvl = sum(cvls), cvls=cvls, fit = fullfit, betas = betas, predictions = predictions))
}
linpreds <- fullfit$fit$lp
delta <- fullfit$fit$residuals
W <- fullfit$fit$W
#check which model you're in
if (length(W$diagW) > 0 & length(W$P) > 1) #cox regression
{
diagW <- W$diagW #vector
G <- P
k <- nrow(G)
l <- ncol(G)
G <- cbind(c(1,rep(0,k)),rbind(rep(0,l),G))
tG <- t(G)
gamma_int <- c(0,gams)
Xint <- cbind(m,X) #NB, m ipv 1! m=median(X)
#B <- Xint%*%tG
B <- tcrossprod(Xint,G)
#Atilde <- G%*%diag(c(0,lambda2))%*%tG
Gl <- G * matrix(sqrt(c(0,lambda2)), nrow(G), ncol(G), byrow = TRUE)
Atilde <- tcrossprod(Gl)
BWB <- crossprod(matrix(sqrt(diagW),nrow(B),ncol(B))*B)
BWBlambda <- BWB + Atilde
inv_tB <- solve(BWBlambda,t(B))
WB <- matrix(diagW,nrow(B),ncol(B))*B
#inv <- solve(t(B)%*%W%*%B+Atilde)
if(fold < n) #kfold
{
betas <- matrix(0,(m+1),fold)
for(i in 1:fold)
{
gamma_k <- .getBeta2(WB,gamma_int,groups,i,delta,inv_tB)
beta_k <- tG %*% gamma_k
betas[,i] <- beta_k
}
}
else #loocv
{
V <- WB %*% inv_tB #forceSymmetric?
gammas_int <- matrix(gamma_int,length(gamma_int),fold)
gammas_int <- gammas_int - inv_tB*matrix((1/(1-diag(V))*delta),nrow(inv_tB),ncol(inv_tB),byrow=TRUE)
betas <- tG %*% gammas_int
}
X <- Xint
}
else if (length(W$diagW) > 1) #poisson/logistic regression
{
diagW <- W$diagW
G <- P
tG <- t(G)
B <- t(PX)
Atilde <- PlP
BWB <- crossprod(matrix(sqrt(diagW),nrow(B),ncol(B))*B)
BWBlambda <- BWB + Atilde
inv_tB <- solve(BWBlambda,t(B))
WB <- matrix(diagW,nrow(B),ncol(B))*B
#inv <- solve(t(B)%*%W%*%B+Atilde)
if(fold < n) #kfold
{
betas <- matrix(0,m,fold)
for(i in 1:fold)
{
gamma_k <- .getBeta2(WB,gams,groups,i,delta,inv_tB)
betas[,i] <- tG %*% gamma_k
}
}
else #loocv
{
V <- WB %*% inv_tB #forceSymmetric?
gammas <- matrix(gams,length(gams),fold)
gammas <- gammas - inv_tB*matrix((1/(1-diag(V))*delta),nrow(inv_tB),ncol(inv_tB),byrow=TRUE)
betas <- tG %*% gammas
}
}
else #linear regression
{
G <- P
tG <- t(G)
B <- t(PX)
Atilde <- PlP
BBlambda <- crossprod(B) + Atilde
inv_tB <- solve(BBlambda,t(B))
#inv <- solve(PX%*%t(PX)+PlP)
if(fold < n) #kfold
{
betas <- matrix(0,m,fold)
for(i in 1:fold)
{
gamma_k <- .getBetaLinear2(B,gams,groups,i,delta,inv_tB)
betas[,i] <- tG %*% gamma_k
}
}
else #loocv
{
V <- B%*%inv_tB
gammas <- matrix(gams,length(gams),fold)
gammas <- gammas - inv_tB*matrix((1/(1-diag(V))*delta),nrow(inv_tB),ncol(inv_tB),byrow=TRUE)
betas <- tG %*% gammas
}
}
}
# True cross-validation starts here
if (fold > 1)
{
failed <- FALSE
predictions <- vector("list", fold)
cvls <- sapply(1:fold, function(i)
{
if (!failed)
{
leaveout <- (groups == i)
lin.pred <- X %*% betas[,i] #?
if (save.predictions)
{
if (length(W)==1) #linear model
{
y <- fullfit$fit$residuals + fullfit$fit$lp
rss <- drop(crossprod(y[!leaveout]-X[!leaveout,]%*%betas[,i]))
nr <- n - sum(leaveout)
predictions[[i]] <<- fit$prediction(lin.pred[leaveout], list(sigma2 = rss/nr), leaveout)
}
else
{
predictions[[i]] <<- fit$prediction(lin.pred[leaveout], fullfit$fit$nuisance, leaveout) #in cox: lin.pred contains intercept
}
}
out <- fit$cvl(lin.pred, leaveout)
if (quit.if.failed && (is.na(out) || abs(out) == Inf || fullfit$converged == FALSE)) failed <<- TRUE
}
else
{
out <- NA
}
out
})
if (failed || any(is.na(cvls))) cvls <- -Inf
}
else
{
cvls <- NA
predictions <- NA
betas <- NA
}
list(cvl = sum(cvls), cvls=cvls, fit = fullfit, betas = betas, predictions = predictions)
}
#only difference with getBeta2 is the weightmatrix W that does not occur in the linear model
.getBetaLinear2 <- function(X,beta,groups,j,delta,inv_tX)
{
Xj <- X[groups==j,,drop=FALSE]
inv_tXj <- inv_tX[,groups==j,drop=FALSE]
Imin <- -Xj%*%inv_tXj #forceSymmetric
diag(Imin) <- diag(Imin+1)
betaj <- beta - inv_tXj%*%solve(Imin,delta[groups==j]) #Imin can be singular in some specific cases..
#betaj <- beta - inv%*%tXj%*%solve(I-Xj%*%inv%*%tXj)%*%delta[groups==j]
return(betaj)
}
.getBeta2 <- function(WX,beta,groups,j,delta,inv_tX)
{
WXj <- WX[groups==j,,drop=FALSE]
inv_tXj <- inv_tX[,groups==j,,drop=FALSE]
Imin <- -WXj%*%inv_tXj #forceSymmetric
diag(Imin) <- diag(Imin+1)
betaj <- beta - inv_tXj%*%solve(Imin,delta[groups==j])
return(betaj)
}
###Fused Lasso
####################
###############################################################################################
.flasso <- function(beta, chr, lambda1, lambda2,fit, X, positive, trace = FALSE,
epsilon=1e-5, maxiter=Inf) {
# Input:
# beta: a vector of length m (say) : starting values
# lambda1 and lambda2: vectors of length m
# X : data matrix
# positive: logical vector denoting the coefficients which should pe penalized
# Should return a list with at least:
# W: The weights matrix, or its diagonal
# loglik: The unpenalized loglikelihood, numeric)
# residuals: The residuals
# If the model includes an Intercept then starting with all the beta coefficients
# equal to zero might slow down the convergence. Hence it's better to use a
# warm start
m=length(beta)
n=nrow(X)
# find regression coefficients free of L1-penalty or positivity restraint
free <- (lambda1 == 0 & lambda2==0) & !positive
# initialize
LL <- -Inf
penalty <- penalty1 <- penalty2 <- Inf
converged = FALSE
active <- !logical(m)
if(any(free)){direc=numeric(length(free[-which(free)]))}
if(!any(free)){direc=numeric(length(beta))}
if(all(free)){direc=numeric(length(free))}
actd = (diff(direc))!=0
checkd = matrix(0,1,2)
nvar <- m
oldmo=length(beta)
tryNR <- FALSE
NRfailed <- FALSE
finish <- FALSE
newfit <- TRUE
retain <-0
cumsteps <- 0
iter <- 0
#iterate
while(!finish){
nzb = (beta!=0)
if(any(free)){direc=numeric(length(free[-which(free)]))}
if(!any(free)){direc=numeric(length(beta))}
if(all(free)){direc=numeric(length(free))}
# calculate the local likelihood fit
if (newfit) {
activeX <- X[,nzb, drop=FALSE]
linpred <- drop(activeX %*% beta[nzb])
lp=linpred
localfit=fit(lp)
# Check for divergence
if (is.na(localfit$loglik)) {
if (trace) {
cat(rep("\b", trunc(log10(nvar))+1), sep ="")
warning("Model does not converge: please increase lambda.", call.=FALSE)
}
converged <- FALSE
break
}
grad <- drop(crossprod(X,localfit$residuals))
oldLL <- LL
oldpenalty <- penalty
LL <- localfit$loglik
penalty1=sum(lambda1[active]*abs(beta[active]))
penalty2 = 0
if((!any(chr==0) && length(table(chr))<=1) || (any(chr ==0) && length(table(chr))<=2)){
penalty2= sum(lambda2[1:(length(lambda2)-1)]*abs(((diff(beta)))))
penalty=penalty1+penalty2
finishedLL <- (2 * abs(LL - oldLL) / (2 * abs(LL - penalty) + 0.1) < epsilon)
finishedpen <- (2 * abs(penalty - oldpenalty) / (2 * abs(LL - penalty) + 0.1) < epsilon)
cumsteps <- 0
}
if((!any(chr==0) && length(table(chr))>1) || (any(chr==0) && length(table(chr))>2)){
chrn=chr[!free]
for (chri in 1:length((as.numeric(names(table(chrn)))))){
betac=beta[!free]
lambda2c=lambda2[!free]
beta1.1 = betac[which(chrn==(as.numeric(names(table(chrn))))[chri])]
lambda2.1 = lambda2c[which(chrn==(as.numeric(names(table(chrn))))[chri])]
penalty21= sum(lambda2.1[1:(length(lambda2.1)-1)]*abs(((diff(beta1.1)))))
penalty2= penalty2 + penalty21
}
penalty <- penalty1 + penalty2
finishedLL <- (2 * abs(LL - oldLL) / (2 * abs(LL - penalty) + 0.1) < epsilon)
finishedpen <- (2 * abs(penalty - oldpenalty) / (2 * abs(LL - penalty) + 0.1) < epsilon)
cumsteps <- 0
}
}
# Calculate the penalized gradient from the likelihood gradient
###Initialize###
##### Core ########################
chrn=chr[!free]
if(!(all(free))){
gradc=grad[!free]
nzbc=nzb[!free]
betac=beta[!free]
lambda1c=lambda1[!free]
lambda2c=lambda2[!free]
positivec=positive[!free]
for (chri in 1:length((as.numeric(names(table(chrn)))))){
beta.1 = betac[which(chrn==(as.numeric(names(table(chrn))))[chri])]
grad.1 = gradc[which(chrn==(as.numeric(names(table(chrn))))[chri])]
lambda1.1 = lambda1c[which(chrn==(as.numeric(names(table(chrn))))[chri])]
lambda2.1 = lambda2c[which(chrn==(as.numeric(names(table(chrn))))[chri])]
nzb.1 = nzbc[which(chrn==(as.numeric(names(table(chrn))))[chri])]
positive.1 = positivec[which(chrn==(as.numeric(names(table(chrn))))[chri])]
gdir <- getdirec(grad=grad.1,nzb=nzb.1,beta=beta.1,lambda1=lambda1.1,lambda2=lambda2.1,positive=positive.1)
direc[which(chrn==(as.numeric(names(table(chrn))))[chri])] = gdir$direc_i
}
if(any(free)){direc=c(grad[which(free)],direc)}
} else {direc=grad}
oldcheckd = checkd
oldactive=active
active= direc!=0 | nzb
checkd = nonZero(direc[!free])
actd = (diff(direc[!free]))!= 0
activdir=direc[active]
activbeta=beta[active]
# check if retaining the old fit of the model does more harm than good
oldnvar <- nvar
nvar <- sum(active)
if ((oldLL - oldpenalty > LL - penalty) || (nvar > 1.1* oldnvar)) {
retain <- 0.5 * retain
}
# check convergence
if(length(checkd)==length(oldcheckd)){
finishednvar <- (!any(xor(active, oldactive)) && all(checkd==oldcheckd))
}
if(length(checkd)!=length(oldcheckd)){
finishednvar <- (!any(xor(active, oldactive)) && length(checkd)==length(oldcheckd))
}
finishedn <- !any(xor(active, oldactive))
if(any(free)){
finish <- (finishedLL && finishedn && finishedpen) || (all(activdir == 0)) || (iter == maxiter)}
if(!any(free)){
finish <- (finishedLL && finishedn && finishedpen) || (all(activdir == 0)) || (iter == maxiter)}
if (!finish) {
iter <- iter+1
if (tryNR){
NRs<- tryNRstep(beta=beta,direc=direc,active=active,X=X,newfit=newfit,localfit=localfit)
beta = NRs$beta
newfit = NRs$newfit
NRfailed = NRs$NRfailed
}
if (!tryNR || NRfailed) {
if (newfit) {
Xdir <- drop(X[,active,drop=F] %*% activdir)
if (length(localfit$W$diagW) > 0 & length(localfit$W$P) > 1) {
curve <- (sum(Xdir * Xdir * localfit$W$diagW) - drop(crossprod(crossprod(localfit$W$P, Xdir)))) / sum(activdir * activdir)
} else if (length(localfit$W$diagW) > 0) {
curve <- sum(Xdir * Xdir * localfit$W$diagW) / sum(activdir * activdir)
} else {
curve <- sum(Xdir * Xdir) / sum(activdir * activdir)
}
topt <- 1 / curve
}
# how far can we go in the calculated direction before finding a new zero?
tedge <- numeric(length(beta))
tedge[active] <- -activbeta / activdir
tedge[tedge <= 0] <-tedge[free] <- 2 * topt
if(length(lambda1)==1 && length(lambda2)==1 && lambda1==0 && lambda2==0){tedgeg=NULL}
if((length(lambda1)!=1 || length(lambda2)!=1) && (sum(lambda1==0)!=length(lambda1) || sum(lambda2==0)!=length(lambda2))){
tedgeg = -diff(beta)/diff(direc)
tedgeg[which(is.na(tedgeg))]=0
dchr = diff(chr)
tedgeg[which(dchr!=0)]=0
tedgeg[tedgeg <= 0] <- 2 * topt
}
if(!is.null(tedgeg)){
mintedge <- min(min(tedge),min(tedgeg))}
if(is.null(tedgeg)){mintedge <- min(tedge)}
# recalculate beta
if (mintedge + cumsteps < topt) {
beta[active] <- activbeta + mintedge * activdir
beta[tedge == mintedge] <- 0 # avoids round-off error
if(!is.null(tedgeg)){
tmin=which(tedgeg==mintedge)
beta[tmin]=beta[tmin+1]
}
cumsteps=cumsteps+mintedge
newfit <- (cumsteps > retain * topt) || (nvar == 1)
NRfailed <- FALSE
tryNR <- FALSE
} else {
beta[active] <- activbeta + (topt - cumsteps) * activdir
#if(iter<=1){tryNR <- (cumsteps == 0) && finishednvar && !NRfailed && nvar < m}else{tryNR <- (cumsteps == 0)&& !NRfailed && nvar < n}
tryNR <- (cumsteps == 0) && !NRfailed && finishednvar && nvar < m
newfit <- TRUE
}
}
}
else {
converged <- (iter < maxiter)
}
}
return(list(beta = beta, fit = localfit, penalty = c(L1 = penalty1, L2 = penalty2), iterations = iter, converged = converged))
}
######################################################################################
##function for getting the indices of non-zero coefficients and coefficients equal to each other#####
nonZero <- function(inp)
{
n <- length(inp)
indices <- matrix(rep(0,n*2),n,2)
j=1
i=1
while(i<=n)
{
if(inp[i] == 0)
{
i <- i+1
}
else if(i<n && (inp[i+1]==inp[i]))
{
indices[j,1] <- i
while(i<n && inp[i+1]==inp[i])
{
i <- i+1
}
indices[j,2] <- i
j <- j+1
i <- i+1
}
else
{
indices[j,1] <- indices[j,2] <- i
i <- i+1
j <- j+1
}
}
return(indices[(1:j-1),])
}
#########################################
###################################
getdirec <- function(grad,nzb,beta,lambda1,lambda2,positive){
#####################
##### Core ########################
direc_i=numeric(length(beta))
i=1
vmax = 0
lam1 <- lam2 <- P_grad<-cu_g <-numeric(length(beta))
oldvmax = vmax
olddirec_i=direc_i
if(any(nzb)){
nz <- nonZero(beta)
nz=rbind(nz,c(0,0))
for (ik in 1:(nrow(nz)-1)){
cu_g[nz[ik,1]:nz[ik,2]]=(sum(grad[nz[ik,1]:nz[ik,2]]))
P_grad[nz[ik,1]:nz[ik,2]]=(cu_g[nz[ik,1]:nz[ik,2]])/(length(nz[ik,1]:nz[ik,2]))
if(nz[ik,1]==nz[ik,2]){
lam1[nz[ik,1]]=lambda1[nz[ik,1]]*sign(beta[nz[ik,1]])
if(nz[ik,1]==1){
lam2[nz[ik,1]]=-lambda2[nz[ik,1]]*sign(beta[nz[ik,1]+1]-beta[nz[ik,1]])*-1
}
if(nz[ik,1]==length(beta)){
lam2[nz[ik,1]]=-lambda2[nz[ik,1]]*sign(beta[nz[ik,1]]-beta[nz[ik,1]-1])
}
if(nz[ik,1]!=1 && nz[ik,1]!=length(beta)){
lam2[nz[ik,1]]=((-lambda2[nz[ik,1]]*sign(beta[nz[ik,1]]-beta[nz[ik,1]-1]))-(lambda2[nz[ik,1]]*sign(beta[nz[ik,1]+1]-beta[nz[ik,1]])*-1))
}
}
if(nz[ik,1]!=nz[ik,2]){
lam1[nz[ik,1]:nz[ik,2]]=lambda1[nz[ik,1]:nz[ik,2]]*sign(beta[nz[ik,1]:nz[ik,2]])
if(nz[ik,1]==1 && nz[ik,2]!=length(beta)){
lam2[nz[ik,1]:nz[ik,2]]=(-lambda2[nz[ik,1]]*sign(beta[nz[ik,2]+1]-beta[nz[ik,1]])*-1)/(length(nz[ik,1]:nz[ik,2]))
}
if(nz[ik,1]!=1 && nz[ik,2]==length(beta) ){
lam2[nz[ik,1]:nz[ik,2]]=(-lambda2[nz[ik,1]]*sign(beta[nz[ik,1]]-beta[nz[ik,1]-1]))/(length(nz[ik,1]:nz[ik,2]))
}
if(nz[ik,2]==length(beta) && nz[ik,1]==1 ){
lam2[nz[ik,1]:nz[ik,2]]=0
}
if(nz[ik,2]!=length(beta) & nz[ik,1]!=1){
lam2[nz[ik,1]:nz[ik,2]]=(((-lambda2[nz[ik,1]]*sign(beta[nz[ik,1]]-beta[nz[ik,1]-1]))-(lambda2[nz[ik,1]]*sign(beta[nz[ik,2]+1]-beta[nz[ik,1]])*-1)))/(length(nz[ik,1]:nz[ik,2]))
}
}
}
}
direc_i= P_grad - lam1 + lam2
olddirec_i=direc_i
while(i<length(beta)){
lam1_1 <- lam1_2 <- lam2_1 <- lam2_2 <-cu_g1 <- cu_g2 <- P_grad1 <- P_grad2<-numeric(length(beta))
direc_i <- numeric(length(beta))
direc_i= P_grad - lam1 + lam2
######## Penalized Gradient for nonzero coefficients##################
if(i<length(beta)){
if(nzb[i]){
nzb_m=c(nzb[i:length(beta)],F)
m=which(diff(nzb_m)==-1)
m=i+(m-1)
m=m[1]
#############gives the length of the non-zero stretch######
if(i==length(nzb)){m=length(nzb)}
betam=beta[i:m]
if(sum(diff(betam))==0){j=m}
if(sum(diff(betam))!=0){
j=which(diff(betam)!=0)
j=i+(j-1)
j=j[1]}
m=j
}
if(!nzb[i]){
nzb_mo=c(nzb[i:length(beta)],T)
mo1=which(diff(nzb_mo)==1)[1]
mo1=i+(mo1-1)
m=mo1
if(i==length(nzb)){m=length(beta)}
}
P_grad1=P_grad
lam1_1=lam1
lam2_1=lam2
P_grad1[i:m]=0
lam1_1[i:m]=0
lam2_1[i:m]=0
if((length(i:m)==1) && (!nzb[i]) ){
P_grad1[m]=grad[m]
lam1_1[m]=lambda1[m]*sign(grad[m])
if(m!=length(beta)){
if(m!=1){
if(beta[m-1]!=0 & beta[m+1]!=0){
lam2_1[m]=((-lambda2[m]*sign(beta[m]-beta[m-1]))-(lambda2[m]*sign(beta[m+1]-beta[m])*-1))
}
if(beta[m-1]==0 & beta[m+1]!=0){
lam2_1[m]=((-lambda2[m]*sign(grad[m]-0))-(lambda2[m]*sign(beta[m+1]-beta[m])*-1))
}
if (beta[m-1]!=0 & beta[m+1]==0){
lam2_1[m]=((-lambda2[m]*sign(beta[m]-beta[m-1]))- (lambda2[m]*sign(0-grad[m])*-1))
}
}
if(m==1){
lam2_1[m]=-lambda2[m]*sign(beta[m+1]-beta[m])*-1
}
}
if(m==length(beta)){
if(beta[m-1]!=0){
lam2_1[m]=-lambda2[m]*sign(beta[m]-beta[m-1])
}
if(beta[m-1]==0){
lam2_1[m]=-lambda2[m]*sign(grad[m]-0)
}
}
}
if(length(i:m)!=1){
if(nzb[i]) {
cu_g1=cu_g2=cu_g
cu_g1[i:m]=cumsum(grad[i:m])
cu_g2[i:(m-1)]=cu_g1[m]-cu_g1[i:(m-1)]
##################################################
P=numeric(length(cu_g1))
P[i:m]=1
P[i:m]=cumsum(P[i:m])
P_grad1[i:m]=(cu_g1[i:m])/P[i:m]
P_grad2[i:m]=(cu_g2[i:m])/(P[m]-P)[i:m]
P_grad2[m]=0
##########################################
###############Calculate lambda1 and lambda2 with signs ################
if(i==1){
if (m!=length(beta)){
lam2_1[i:m]=((-lambda2[i:m]*sign(P_grad2[i:m]-P_grad1[i:m])*-1))/P[i:m]
lam2_1[m]=(-lambda2[m]*sign(beta[m+1]-beta[m])*-1)/(P[m])
}
if(m==length(beta)){
lam2_1[i:m]=(-lambda2[i:m]*sign(P_grad2[i:m]-P_grad1[i:m])*-1)/P[i:m]
lam2_1[m]=0
}
}
if(i!=1){
if(m!=length(beta)){
lam2_1[i:m]=((-lambda2[i:m]*sign(beta[i]-beta[i-1])-(lambda2[i:m]*sign(P_grad2[i:m]-P_grad1[i:m])*-1)))/P[i:m]
lam2_1[m]=(-lambda2[m]*sign(beta[i]-beta[i-1]) - (lambda2[m]*sign(beta[m+1]-beta[i])*-1))/(P[m])
}
if(m==length(beta)){
lam2_1[i:m]=((-lambda2[i:m]*sign(beta[i]-beta[i-1])-(lambda2[i:m]*sign(P_grad2[i:m]-P_grad1[i:m])*-1)))/P[i:m]
lam2_1[m]=(-lambda2[m]*sign(beta[i]-beta[i-1]))/(P[m])
}
}
if(m!=length(beta)){
lam2_2[i:m]=((-lambda2[i:m]*sign(P_grad2[i:m]-P_grad1[i:m])-(lambda2[i:m]*sign(beta[m+1]-beta[i])*-1)))/(P[m]-P[i:m])
lam2_2[m]=0
}
if(m==length(beta)){
lam2_2[i:m]=(-lambda2[i:m]*sign(P_grad2[i:m]-P_grad1[i:m]))/(P[m]-P[i:m])
lam2_2[m]=0
}
#####################################################
lam1_1[i:m] =(lambda1[i:m]*sign(P_grad1[i:m]))
lam1_2[i:m]=(lambda1[i:m]*sign(P_grad2[i:m]))
lam1_2[m]=0
}
#######################################################################
if(!nzb[i]){
cu_g1=P_grad
cu_g1[i:m]=cumsum(grad[i:m])
cu_g2=numeric(length(cu_g1))
##################################################
P=numeric(length(cu_g1))
P[i:m]=1
P[i:m]=cumsum(P[i:m])
P_grad1[i:m]=(cu_g1[i:m])/P[i:m]
P_grad2= numeric(length(beta))
##########################################
###############Calculate lambda1 and lambda2 with signs ################
if(i==1){
if (m!=length(beta)){
lam2_1[i:m]= (-lambda2[i:m]*sign(0-P_grad1[i:m])*-1)/P[i:m]
lam2_1[m]=(-lambda2[m]*sign(beta[m+1]-beta[m])*-1)/(P[m])
}
if(m==length(beta)){
lam2_1[i:m]= (-lambda2[i:m]*sign(0-P_grad1[i:m])*-1)/P[i:m]
lam2_1[m]=0
}
}
if(i!=1){
if (m == length(beta)){
if(beta[i-1]==0){
lam2_1[i:m]=(-lambda2[i:m]*sign(P_grad1[i:m]-0)- (lambda2[i:m]*sign(0-P_grad1[i:m])*-1))/P[i:m]
lam2_1[m]=(-lambda2[m]*sign(P_grad1[m]-0))/(P[m])
}
if(beta[i-1]!=0){
lam2_1[i:m]=((-lambda2[i:m]*(sign(beta[i:m]-beta[i-1])))- (lambda2[i:m]*sign(0-P_grad1[i:m])*-1))/P[i:m]
lam2_1[m]=(-lambda2[m]*(sign(beta[m]-beta[i-1])))/(P[m])
}
}
if(m != length(beta)){
if(beta[m+1]==0 & beta[i-1]==0){
lam2_1[i:m]=((-lambda2[i:m]*(sign(P_grad1[i:m]-0)))- (lambda2[i:m]*sign(0-P_grad1[i:m])*-1))/P[i:m]
lam2_1[m]= lam2_1[m]/P[m]
}
if(beta[m+1]==0 & beta[i-1]!=0){
lam2_1[i:m]=((-lambda2[i:m]*(sign(beta[i:m]-beta[i-1])))- (lambda2[i:m]*sign(0-P_grad1[i:m])*-1))/P[i:m]
lam2_1[m]= lam2_1[m]/P[m]
}
if(beta[m+1]!=0 & beta[i-1]==0){
lam2_1[i:m]=((-lambda2[i:m]*(sign(P_grad1[i:m]-0)))- (lambda2[i:m]*sign(0-P_grad1[i:m])*-1))/P[i:m]
lam2_1[m]= ((-lambda2[m]*(sign(P_grad1[m]-0)))- (lambda2[m]*sign(beta[m+1]-beta[m])*-1))/P[m]
}
if(beta[m+1]!=0 & beta[i-1]!=0){
lam2_1[i:m]=((-lambda2[i:m]*(sign(beta[i:m]-beta[i-1])))- (lambda2[i:m]*sign(0-P_grad1[i:m])*-1))/P[i:m]
lam2_1[m] = ((-lambda2[m]*(sign(beta[m]-beta[i-1])))- (lambda2[m]*sign(beta[m+1]-beta[m])*-1))/P[m]
}
}
}
lam1_1[i:m]=(lambda1[i:m]*sign(P_grad1[i:m]))
}
}
if(nzb[i] && (length(i:m)!=1)){
direc1_1=numeric(length(cu_g1))
direc1_2=numeric(length(cu_g2))
direc1_1[i:m]=P_grad1[i:m]-lam1_1[i:m]+lam2_1[i:m]
direc1_1[length(direc1_1)]=P_grad1[length(direc1_1)]-lam1_1[length(direc1_1)]+lam2_1[length(direc1_1)]
direc1_2[i:m]=P_grad2[i:m]-lam1_2[i:m]+lam2_2[i:m]
test=(direc1_1>direc1_2)
test1=(direc1_1<direc1_2)
testd = ((test&((P_grad1)>(P_grad2)))|(test1&((P_grad1)<(P_grad2))))
testd[m] = F
if(any(testd[i:m])){
test2=(i:m)[which(testd[i:m])]
direc_j=matrix(0,nrow=length(direc1_1),ncol=length(direc1_2))
if(length(test2)>1){
for(ik in 1:length(test2)){direc_j[i:test2[ik],test2[ik]]=(direc1_1)[test2[ik]]}
for(ik in 1:(length(test2)-1)){direc_j[(test2[ik]+1):m,test2[ik]]=(direc1_2)[test2[ik]]}
if(test2[length(test2)]!=m){
direc_j[(test2[length(test2)]+1):m,test2[length(test2)]]=direc1_2[test2[length(test2)]]
}
} else
if(length(test2)==1){
if (test2==m){
direc_j[i:test2,test2]=(direc1_1)[test2]
}
if (test2!=m){
direc_j[i:test2,test2]=(direc1_1)[test2]
direc_j[(test2+1):m,test2]=(direc1_2)[test2]
}
}
norm1 <- apply(direc_j,2,function(il){sqrt(sum(t(il)*il))})
vmax2=max(norm1)
direc_i[i:m]=direc_j[(i:m),which(norm1==vmax2)]
if(which(vmax2==norm1)==m){vmax=0}
if(which(vmax2==norm1)!=m){
vmax= sqrt(sum(t(direc_i)*direc_i)) }
}
}
if(!nzb[i]){
if(length(i:m)!=1){
lam=numeric(length(lam1_1))
lam=((lam1_1)-(lam2_1))
if(any((P_grad1[i:m]*sign(P_grad1[i:m]))>(lam[i:m]*sign(P_grad1[i:m])))){
d_i = which((P_grad1[i:m]*sign(P_grad1[i:m]))>(lam[i:m]*sign(P_grad1[i:m])))
direc1_1= P_grad1 - lam1_1 + lam2_1
direc_j=matrix(0,length(i:m),length(i:m))
for(im in 1:length(d_i)){
direc_j[1:d_i[im],d_i[im]]=((P_grad1[i:m])[d_i[im]]-((lam1_1[i:m])[d_i[im]])+((lam2_1[i:m])[d_i[im]]))
}
norm_d <- apply(direc_j,2,function(il){sqrt(sum(t(il)*il))})
vmax1=max(norm_d)
direc1_1[i:m]=direc_j[,which(norm_d==vmax1)]
direc_i[i:m]=direc1_1[i:m]
vmax = sqrt(sum(t(direc_i)*direc_i))
}
}
if(length(i:m)==1){
if((grad[m]*sign(grad[m]))>(lam1_1[m]-lam2_1[m])*sign(grad[m])){
direc_i[m]=grad[m] - lam1_1[m] + lam2_1[m]
vmax= sqrt(sum(t(direc_i)*direc_i))
}
}
}
if (oldvmax>=vmax) {direc_i=olddirec_i
vmax=oldvmax}
olddirec_i=direc_i
oldvmax=vmax
if(nzb[i]){
if(length(i:m)==1){io=i+1}
if(length(i:m)!=1){
if(j==m){io=m+1}
if(j!=m){io=j+1}
}
i=io } else {i=i+1}
}
}
return(list(direc_i = direc_i))
}
######################################
######################################
tryNRstep <- function (beta = beta,direc=direc,active=active,X=X,localfit=localfit,newfit=newfit) {
NRm=nonZero(beta)
if(any((beta==0) & active)){
NRact=which((beta==0) & active)
NRm=rbind(NRm,cbind(NRact,NRact))
}
NRM=rbind(NRm,c(0,0))
NR_X=matrix(0,nrow=nrow(X),ncol=sum(active))
NRb=beta[active]
NR_X = as.matrix(X[,active])
if (length(localfit$W$diagW) > 0 & length(localfit$W$P) > 1) {
XdW <- NR_X * matrix(sqrt(localfit$W$diagW), nrow(NR_X), ncol(NR_X))
hessian <- -crossprod(XdW) + crossprod(crossprod(localfit$W$P, NR_X))
} else if (length(localfit$W$diagW) > 1) {
XW <- NR_X * matrix(sqrt(localfit$W$diagW), nrow(NR_X), ncol(NR_X))
hessian <- -crossprod(XW)
} else {
hessian <- -crossprod(NR_X)
}
NRbeta <- NRb - drop(.solve(hessian, direc[active]))
newbeta=numeric(length(beta))
newbeta[active] = NRbeta
NRfailed <- !all(sign(newbeta[beta!=0]) == sign(beta[beta!=0]))
if (!NRfailed) {
NR_X=matrix(0,nrow=nrow(X),ncol=(nrow(NRM)-1))
NRb=beta[NRM[-(nrow(NRM)),1]]
for (ik in 1:(nrow(NRM)-1)){if(NRM[ik,1]==NRM[ik,2]){
NR_X[,ik]=X[,NRM[ik,1]]}
else{NR_X[,ik]=rowSums(X[,NRM[ik,1]:NRM[ik,2]])}
}
NR_X=as.matrix(NR_X)
if (length(localfit$W$diagW) > 0 & length(localfit$W$P) > 1) {
XdW <- NR_X * matrix(sqrt(localfit$W$diagW), nrow(NR_X), ncol(NR_X))
hessian <- -crossprod(XdW) + crossprod(crossprod(localfit$W$P, NR_X))
} else if (length(localfit$W$diagW) > 0) {
XW <- NR_X * matrix(sqrt(localfit$W$diagW), nrow(NR_X), ncol(NR_X))
hessian <- -crossprod(XW)
} else {
hessian <- -crossprod(NR_X)
}
NRbeta <- NRb - drop(.solve(hessian, direc[NRM[-(nrow(NRM)),1]]))
newbeta=numeric(length(beta))
for (ki in 1:(nrow(NRM)-1)){newbeta[NRM[ki,1]:NRM[ki,2]]=NRbeta[ki]}
beta=newbeta
newfit <- TRUE
}
return(list(beta=beta,newfit=newfit,NRfailed = NRfailed))
}
##############################################
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.