hierNet <- function(x, y, lam, delta=1e-8, strong=FALSE, diagonal=TRUE, aa=NULL, zz=NULL, center=TRUE, stand.main=TRUE, stand.int=FALSE,
rho=nrow(x), niter=100, sym.eps=1e-3,
step=1, maxiter=2000, backtrack=0.2, tol=1e-5,
trace=0) {
# Main Hiernet function for fitting at a single parameter lambda.
# Note: L1 penalty terms have parameter lam.l1 = lambda * (1-delta)
# and L2 penalty has parameter lam.l2 = lambda * delta.
#
# stand.main and stand.int refer to scaling
stopifnot(nrow(x) == length(y), lam >= 0, delta >= 0, delta <= 1)
stopifnot(!is.null(step) && !is.null(maxiter))
if (strong) stopifnot(!is.null(niter))
stopifnot(class(y) == "numeric")
stopifnot(class(lam) == "numeric")
stopifnot(class(delta) == "numeric")
stopifnot(class(step) == "numeric", step > 0, maxiter > 0)
stopifnot(is.finite(x), is.finite(y), is.finite(lam), is.finite(delta))
this.call <- match.call()
if (!center) cat("WARNING: center=FALSE should almost never be used. This option is available for special uses only.", fill=TRUE)
# center and (maybe) scale variables
x <- scale(x, center=center, scale=stand.main)
mx <- attr(x, "scaled:center")
sx <- attr(x, "scaled:scale") # may be NULL
if (center) {
my <- mean(y)
y <- y - my
} else my <- NULL
if (is.null(zz)) {
if (trace > 0) cat("Computing zz...", fill=TRUE)
zz <- compute.interactions.c(x, diagonal=diagonal)
}
if (is.matrix(zz)) {
zz <- scale(zz, center=center, scale=stand.int)
mzz <- attr(zz, "scaled:center")
szz <- attr(zz, "scaled:scale") # may be NULL
zz <- as.numeric(zz)
} else {
mzz <- szz <- NULL
#cat("Provided zz is not a matrix, so it's assumed to be already centered.", fill=TRUE)
}
xnum <- as.numeric(x)
p <- ncol(x)
lam.l1 <- lam * (1 - delta)
lam.l2 <- lam * delta
if (strong) {
# strong hierarchy -- use ADMM4
if (is.null(rho)) rho <- as.numeric(nrow(x))
stopifnot(is.numeric(rho), is.finite(rho))
aa <- admm4(x, xnum, y, lam.l1=lam.l1, lam.l2=lam.l2, diagonal=diagonal, zz=zz,
rho=rho, niter=niter, aa=aa, sym.eps=sym.eps, # ADMM params
stepsize=step, backtrack=backtrack, maxiter=maxiter, tol=tol, # GG params
trace=trace)
# lack of symmetry in theta means that sometimes strong hierarchy will be (very slightly violated)
ii <- aa$bp + aa$bn == 0
# note aa$th[ii, ] = 0 since weak hierarchy holds for sure
if (sum(ii) > 0 & sum(ii) < p) {
thr <- max(abs(aa$th[!ii, ii]))
if (thr > 0) {
cat(" thr = ",thr, fill=TRUE)
if (thr > 1e-3)
warning("Had to change ADMM's 'th' by more than 0.001 to make strong hier hold! Increase niter (and/or rho). ")
aa$th[abs(aa$th) <= thr] <- 0
}
}
} else {
# weak hierarchy -- a single call to generalized gradient descent
if (is.null(aa)) {
aa <- list(th=matrix(0, p, p), bp=rep(0, p), bn=rep(0, p))
} else {
stopifnot(dim(aa$th) == c(p,p), length(aa$bp) == p, length(aa$bn) == p)
}
# this could be improved by not actually creating V...
V <- matrix(0, p, p)
rho <- 0
aa <- ggdescent.c(x=x, xnum=xnum, zz=zz, y=y, lam.l1=lam.l1, lam.l2=lam.l2, diagonal=diagonal,
rho=rho, V=V,
stepsize=step, backtrack=backtrack, maxiter=maxiter, tol=tol,
aa=aa, trace=trace)
}
aa$lam <- lam
aa$delta <- delta
aa$type <- "gaussian"
aa$diagonal <- diagonal
aa$strong <- strong
aa$obj <- Objective(aa=aa, x=x, y=y, lam.l1=lam.l1, lam.l2=lam.l2, xnum=xnum, zz=zz, strong=strong)
aa$step <- step
aa$maxiter <- maxiter
aa$backtrack <- backtrack
aa$tol <- tol
if (strong) {
# ADMM parameters:
aa$rho <- rho
aa$niter <- niter
aa$sym.eps <- sym.eps
}
aa$mx <- mx
aa$sx <- sx
aa$my <- my
aa$mzz <- mzz
aa$szz <- szz
aa$call <- this.call
class(aa) <- "hierNet"
return(aa)
}
print.hierNet <- function(x, ...) {
cat("Call:\n")
dput(x$call)
th=(x$th+t(x$th))/2
o2=colSums(th^2)!=0
b=x$bp-x$bn
o=b!=0
b=b[o]
if (any(o2)) {
# model has interactions
th=th[o,o2,drop=FALSE]
tight <- rowSums(abs(th)) >= x$bp[o] + x$bn[o] - 1e-9
tt <- rep("", length(tight))
tt[tight] <- "*"
mat=cbind(b,th)
mat=round(mat,4)
mat <- cbind(mat, tt)
cat("\n")
cat("Non-zero coefficients:",fill=T)
cat(" (Rows are predictors with nonzero main effects)",fill=T)
cat(" (1st column is main effect)", fill=T)
cat(" (Next columns are nonzero interactions of row predictor)", fill=T)
cat(" (Last column indicates whether hierarchy constraint is tight.)",fill=T)
cat("\n")
dimnames(mat)=list(as.character(which(o)),c("Main effect",as.character(which(o2)),"Tight?"))
print(mat, quote = FALSE)
} else {
mat <- matrix(round(b,4), length(b), 1)
cat("\n")
cat("Non-zero coefficients:",fill=T)
cat(" (No interactions in this model)",fill=T)
cat("\n")
dimnames(mat)=list(as.character(which(o)),"Main effect")
print(mat, quote = FALSE)
}
invisible()
}
print.hierNet.path <- function(x, ...) {
cat("Call:\n")
dput(x$call)
b=x$bp-x$bn
mat=cbind(round(x$lam,2),round(x$obj,2),colSums(b!=0),apply(x$th!=0,3,function(a) sum(diag(a)) + sum((a+t(a)!=0)[upper.tri(a)])))
dimnames(mat)=list(NULL,c("Lambda", "Objective", "Number of main effects","Number of interactions"))
cat("\n")
print(mat, quote = FALSE)
invisible()
}
print.hierNet.cv <- function(x, ...) {
cat("Call:\n")
dput(x$call)
mat=cbind(round(x$lamlist,2),x$nonzero,round(x$cv.err,2),round(x$cv.se,2))
dimnames(mat)=list(NULL,c("Lambda", "Number of nonzero","Mean CV error", "SE"))
cat("\n")
print(mat, quote = FALSE)
cat("\n")
cat(c("lamhat=",round(x$lamhat,2),"lamhat.1se=",round(x$lamhat.1se,2)),fill=T)
invisible()
}
hierNet.path <- function(x, y, lamlist=NULL, delta=1e-8, minlam=NULL, maxlam=NULL, nlam=20, flmin=.01,
diagonal=TRUE, strong=FALSE, aa=NULL, zz=NULL,
stand.main=TRUE, stand.int=FALSE,
rho=nrow(x), niter=100, sym.eps=1e-3,# ADMM params
step=1, maxiter=2000, backtrack=0.2, tol=1e-5, # GG descent params
trace=0) {
# Main Hiernet function for fitting at a sequence of lambda values.
# Note: L1 penalty terms have parameter lam.l1 = lambda * (1-delta)
# and L2 penalty has parameter lam.l2 = lambda * delta.
#
# Always centers both x and zz (unless zz is provided in as.numeric form)
# stand.main and stand.int refer to whether main effects and interactions should have norm sqrt(n-1)
# center and (maybe) scale variables
this.call <- match.call()
x <- scale(x, center=TRUE, scale=stand.main)
mx <- attr(x, "scaled:center")
sx <- attr(x, "scaled:scale") # may be NULL
my <- mean(y)
y <- y - my
if (is.null(maxlam)) {
if (!is.null(minlam)) stop("Cannot have maxlam=NULL if minlam is non-null.")
# maxlam <- max(abs(t(x) %*% y)/colSums(x^2))
maxlam <- max(abs(t(x) %*% y))
# temp <- t(scale(t(x), center=FALSE, scale=1/y))
# temp2 <- apply(temp, 2, twonorm)
# maxlam <- max(max(temp2), maxlam)
minlam <- maxlam * flmin
}
if (is.null(minlam)) minlam <- maxlam * flmin
if (is.null(lamlist))
lamlist <- exp(seq(log(maxlam),log(minlam),length=nlam))
nlam <- length(lamlist)
if (is.null(zz))
zz <- compute.interactions.c(x, diagonal=diagonal)
else
stopifnot(is.matrix(zz))
# center and (maybe) scale zz
zz <- scale(zz, center=TRUE, scale=stand.int)
mzz <- attr(zz, "scaled:center")
szz <- attr(zz, "scaled:scale") # may be NULL
zz <- as.numeric(zz)
p <- ncol(x)
cp2 <- choose(p, 2)
bp <- bn <- matrix(NA, nrow=p, ncol=nlam)
th <- array(NA, c(p, p, nlam))
obj <- rep(NA, nlam)
aa <- NULL
for (i in seq(nlam)) {
cat(c("i,lam=", i, round(lamlist[i],2)), fill=TRUE)
aa <- hierNet(x, y, lam=lamlist[i], delta=delta, strong=strong, diagonal=diagonal, aa=aa, zz=zz,
stand.main=FALSE, stand.int=FALSE, # have already standardized
rho=rho, niter=niter, sym.eps=sym.eps,
step=step, maxiter=maxiter, backtrack=backtrack, tol=tol, trace=trace)
bp[, i] <- aa$bp
bn[, i] <- aa$bn
th[, , i] <- aa$th
obj[i] <- aa$obj
}
dimnames(bp) <- dimnames(bn) <- list(as.character(1:p), NULL)
dimnames(th) <- list(as.character(1:p), as.character(1:p), NULL)
out <- list(bp=bp, bn=bn, th=th, obj=obj, lamlist=lamlist, delta=delta, mx=mx, sx=sx, mzz=mzz, szz=szz, my=my,
type="gaussian", diagonal=diagonal, strong=strong,
step=step, maxiter=maxiter, backtrack=backtrack, tol=tol,
call=this.call)
if (strong) {
# ADMM parameters:
out$rho <- rho
out$niter <- niter
out$sym.eps <- sym.eps
}
class(out) <- "hierNet.path"
out
}
predict.hierNet <- function(object, newx, newzz=NULL, ...) {
n <- nrow(newx)
if (is.null(object$sx))
newx <- scale(newx, center=object$mx, scale=FALSE)
else
newx <- scale(newx, center=object$mx, scale=object$sx)
if (is.null(newzz))
newzz <- compute.interactions.c(newx, diagonal=object$diagonal)
if (is.null(object$szz))
newzz <- scale(newzz, center=object$mzz, scale=FALSE)
else
newzz <- scale(newzz, center=object$mzz, scale=object$szz)
newzz <- as.numeric(newzz)
newx <- as.numeric(newx)
stopifnot(is.finite(newzz), is.finite(newx))
if (class(object$bp) == "numeric")
yhatt <- Compute.yhat.c(newx, newzz, object) + object$my
else {
nlam <- ncol(object$bp)
yhat <- matrix(NA, n, nlam)
# this could be made more efficient
for (i in seq(nlam)) {
bb <- list(bp=object$bp[, i], bn=object$bn[, i], th=object$th[, , i], diagonal=object$diagonal)
yhat[, i] <- Compute.yhat.c(newx, newzz, bb)
}
yhatt <- yhat + object$my
}
if (object$type == "logistic") {
# predict from hierNet.logistic object object
b0 <- object$b0
if(is.matrix(yhatt))
b0 <- matrix(b0, nrow=nrow(yhatt), ncol=ncol(yhatt), byrow=T)
yhatt <- b0 + yhatt
pr <- 1 / (1 + exp(-yhatt))
return(list(prob=pr, yhat=1*(pr>.5)))
}
return(yhatt)
}
predict.hierNet.path <- function(object, newx, newzz=NULL, ...){
predict.hierNet(object, newx, newzz, ...)
}
admm4 <- function(x, xnum, y, lam.l1, lam.l2, diagonal, zz=NULL, rho, niter, aa=NULL, sym.eps=1e-3, trace=1, ...) {
# Performs ADMM4.
# Note: xnum is the matrix x as a numeric. Both are passed to avoid having to call as.numeric too
# many times.
p <- ncol(x)
if (is.null(zz)) {
if (trace > 0) cat("Computing zz...", fill=TRUE)
zz <- as.numeric(compute.interactions.c(x, diagonal=diagonal))
}
else if (class(zz) == "matrix") zz <- as.numeric(zz)
if (is.null(aa)) {
aa <- list(u=matrix(0, p, p),
th=matrix(0, p, p),
bp=rep(0, p),
bn=rep(0, p),
tt=matrix(0, p, p),
diagonal=diagonal)
} else {
stopifnot(diagonal == aa$diagonal)
}
if (is.null(aa$tt) || is.null(aa$u)) {
aa$tt <- 0.5 * (aa$th + t(aa$th))
aa$u <- matrix(0, p, p)
}
obj <- Objective(aa=aa, x=x, y=y, lam.l1=lam.l1, lam.l2=lam.l2, xnum=xnum, zz=zz, strong=TRUE, sym.eps=sym.eps)
ll <- NULL
for (i in seq(niter)) {
if (trace > 0) cat(i, " ")
ll <- c(ll, ADMM4.Lagrangian(aa, xnum, zz, y, lam.l1=lam.l1, lam.l2=lam.l2, diagonal=diagonal, rho))
V <- aa$u - rho * aa$tt
gg <- ggdescent.c(x, xnum, zz, y, lam.l1=lam.l1, lam.l2=lam.l2, diagonal=diagonal, rho, V, trace=trace-1, aa=aa, ...)
aa$th <- gg$th
aa$bp <- gg$bp
aa$bn <- gg$bn
aa$tt <- (aa$th + t(aa$th)) / 2 + (aa$u + t(aa$u)) / (2 * rho)
aa$u <- aa$u + rho * (aa$th - aa$tt)
obj <- c(obj, Objective(aa=aa, x=x, y=y, lam.l1=lam.l1, lam.l2=lam.l2, xnum=xnum, zz=zz, strong=TRUE, sym.eps=sym.eps))
if (trace > 0) cat(obj[i+1], fill=TRUE)
}
if (max(abs(aa$th-t(aa$th))) > sym.eps)
cat("Attention: th not symmetric within the desired sym.eps. Run ADMM for more iterations. And try increasing rho.")
aa$obj <- obj
aa$lagr <- ll
aa
}
Objective <- function(aa, x, y, lam.l1, lam.l2, xnum=NULL, zz=NULL, strong=TRUE, sym.eps=1e-3) {
# evaluates the NewYal objective at aa.
if (strong) {
if (max(aa$th-t(aa$th)) > sym.eps) {
cat("Theta is not symmetric.", fill=TRUE)
return(Inf)
}
}
if (any(rowSums(abs(aa$th)) > aa$bp + aa$bn + 1e-5)) {
cat("hierarchy violated.", fill=TRUE)
return(Inf)
}
if (any(aa$bp < -1e-5)||any(aa$bn < -1e-5)) {
cat("Non-negative of bp or bn violated.", fill=TRUE)
return(Inf)
}
if (aa$diagonal == FALSE)
if (any(abs(diag(aa$th)) > 1e-8)) {
cat("Zero diagonal violated.", fill=TRUE)
return(Inf)
}
if (is.null(zz)) {
zz <- as.numeric(compute.interactions.c(x, diagonal=aa$diagonal))
}
if (is.null(xnum)) xnum <- as.numeric(x)
r <- y - Compute.yhat.c(xnum, zz, aa)
pen <- lam.l1 * sum(aa$bp + aa$bn) + lam.l1 * sum(abs(aa$th))/2 + lam.l1 * sum(abs(diag(aa$th)))/2
pen <- pen + lam.l2 * (sum(aa$bp^2) + sum(aa$bn^2) + sum(aa$th^2))
sum(r^2)/2 + pen
}
Objective.logistic <- function(aa, x, y, lam.l1, lam.l2, xnum=NULL, zz=NULL, strong=TRUE, sym.eps=1e-3) {
# evaluates the logistic hiernet objective at aa.
stopifnot(y %in% c(0,1))
stopifnot("diagonal" %in% names(aa))
if (aa$diagonal == FALSE)
if (any(abs(diag(aa$th)) > 1e-8)) {
cat("Diagonal of Theta is nonzero.", fill=TRUE)
return(Inf)
}
if (strong) {
if (max(aa$th-t(aa$th)) > sym.eps) {
cat("Theta is not symmetric.", fill=TRUE)
return(Inf)
}
}
if (any(rowSums(abs(aa$th)) > aa$bp + aa$bn + 1e-5)) {
cat("hierarchy violated.", fill=TRUE)
return(Inf)
}
if (any(aa$bp < -1e5)||any(aa$bn < -1e5)) {
cat("Non-negative of bp or bn violated.", fill=TRUE)
return(Inf)
}
if (is.null(zz)) {
zz <- as.numeric(scale(compute.interactions.c(x, diagonal=aa$diagonal), center=TRUE, scale=FALSE))
}
if (is.matrix(zz)) zz <- as.numeric(zz)
if (is.null(xnum)) xnum <- as.numeric(x)
phat <- Compute.phat.c(xnum, zz, aa)
loss <- -sum(y*log(phat)) - sum((1-y)*log(1-phat))
pen <- lam.l1 * sum(aa$bp + aa$bn) + lam.l1 * sum(abs(aa$th))/2 + lam.l1 * sum(abs(diag(aa$th)))/2
pen <- pen + lam.l2 * (sum(aa$bp^2) + sum(aa$bn^2) + sum(aa$th^2))
loss + pen
}
compute.interactions.c <- function(x, diagonal=TRUE) {
# Returns (uncentered) n by cp2 matrix of interactions.
# The columns of zz are in standard order (11), 12,13,14,...,(22),23,...
# z's (jk)th column is x_j * x_k
n <- nrow(x)
p <- ncol(x)
cp2 <- p * (p - 1) / 2
if (diagonal) {
cp2 <- cp2 + p
out <- .C("ComputeInteractionsWithDiagWithIndices",
as.double(x),
as.integer(n),
as.integer(p),
z=rep(0, n * cp2),
i1=as.integer(rep(0, cp2)),
i2=as.integer(rep(0, cp2)), dupl = FALSE, PACKAGE="hierNet")
}
else {
out <- .C("ComputeInteractionsWithIndices",
as.double(x),
as.integer(n),
as.integer(p),
z=rep(0, n * cp2),
i1=as.integer(rep(0, cp2)),
i2=as.integer(rep(0, cp2)), dupl = FALSE, PACKAGE="hierNet")
}
z <- matrix(out$z, n, cp2)
rownames(z) <- rownames(x)
if (is.null(colnames(x))) {
colnames(z) <- paste(out$i1, out$i2, sep=":")
}
else {
colnames(z) <- paste(colnames(x)[out$i1], colnames(x)[out$i2], sep=":")
}
z
}
compute.full.interactions.c <- function(x) {
# Returns (uncentered) n by p^2 matrix of interactions.
# The columns of zz are in standard order 11,12,13,14,...,23,...
# z's (jk)th column is x_j * x_k
n <- nrow(x)
p <- ncol(x)
out <- .C("ComputeFullInteractions",
as.double(x),
as.integer(n),
as.integer(p),
z=rep(0, n * p^2),
dupl = FALSE, PACKAGE="hierNet")
matrix(out$z, n, p^2)
}
Compute.yhat.c <- function(xnum, zz, aa) {
# aa: list containing bp, bn, th, diagonal
# note: zz is the n by cp2 matrix, whereas z is the n by p^2 one.
p <- length(aa$bp)
n <- length(xnum) / p
stopifnot(n==round(n))
stopifnot("diagonal" %in% names(aa))
if (aa$diagonal) stopifnot(length(zz) == n * (choose(p,2) + p))
else stopifnot(length(zz) == n * choose(p,2))
out <- .C("compute_yhat_zz_R",
xnum,
as.integer(n),
as.integer(p),
zz,
as.integer(aa$diagonal),
as.double(aa$th),
aa$bp,
aa$bn,
yhat=rep(0, n),
DUP=FALSE, PACKAGE="hierNet")
out$yhat
}
Compute.phat.c <- function(xnum, zz, aa) {
# aa: list containing b0, bp, bn, th
# note: zz is the n by cp2 matrix, whereas z is the n by p^2 one.
stopifnot(c("b0","bp","bn","th","diagonal") %in% names(aa))
p <- length(aa$bp)
n <- length(xnum) / p
if (is.matrix(xnum)) xnum <- as.numeric(xnum)
stopifnot(n == round(n))
if (aa$diagonal) stopifnot(length(zz) == n * (choose(p,2) + p))
else stopifnot(length(zz) == n * choose(p,2))
#void compute_phat_zz_R(double *x, int *n, int *p, double *zz, int *diagonal,
# double *b0, double *th, double *bp, double *bn, double *phat) {
out <- .C("compute_phat_zz_R",
xnum,
as.integer(n),
as.integer(p),
zz,
as.integer(aa$diagonal),
as.double(aa$b0),
as.double(aa$th),
aa$bp,
aa$bn,
phat=rep(0, n),
DUP=FALSE, PACKAGE="hierNet")
out$phat
}
ggdescent.c <- function(x, xnum, zz, y, lam.l1, lam.l2, diagonal, rho, V, stepsize, backtrack=0.2, maxiter=100,
tol=1e-5, aa=NULL, trace=1) {
# See ADMM4 pdf for the problem this solves.
#
# x, xnum, zz, y: data (note: zz is a length n*cp2 vector, not a matrix) xnum is x as a vector
# lam.l1: l1-penalty parameter
# lam.l2: l2-penalty parameter
# rho: admm parameter
# V: see ADMM4 pdf
# stepsize: step size to start backtracking with
# backtrack: factor by which step is reduced on each backtrack.
# maxiter: number of generalized gradient steps to take.
# tol: stop gg descent if change in objective is below tol.
# aa: initial estimate of (th, bp, bn)
# trace: how verbose to be
#
# void ggdescent_R(double *x, int *n, int *p, double *zz, int *diagonal, double *y,
# double *lamL1, double*lamL2, double *rho, double *V, int *maxiter,
# double *curth, double *curbp, double *curbn,
# double *t, int *stepwindow, double *backtrack, double *tol, int *trace,
# double *th, double *bp, double *bn) {
n <- length(y)
p <- ncol(x)
stepwindow <- 10
if (is.null(aa)) aa <- list(th=matrix(0,p,p), bp=rep(0,p), bn=rep(0,p))
out <- .C("ggdescent_R",
xnum,
as.integer(n),
as.integer(p),
zz,
as.integer(diagonal),
y,
as.double(lam.l1),
as.double(lam.l2),
as.double(rho),
as.double(V),
as.integer(maxiter),
as.double(aa$th),
aa$bp,
aa$bn,
stepsize,
as.integer(stepwindow),
backtrack,
tol,
as.integer(trace),
th=rep(0, p*p),
bp=rep(0, p),
bn=rep(0, p),
DUP=FALSE, PACKAGE="hierNet")
list(bp=out$bp, bn=out$bn, th=matrix(out$th, p, p))
}
hierNet.logistic <- function(x, y, lam, delta=1e-8, diagonal=TRUE, strong=FALSE, aa=NULL, zz=NULL, center=TRUE,
stand.main=TRUE, stand.int=FALSE,
rho=nrow(x), niter=100, sym.eps=1e-3,# ADMM params
step=1, maxiter=2000, backtrack=0.2, tol=1e-5, # GG descent params
trace=1) {
# Solves the logistic regression hiernet. Returns (b0, bp, bn, th)
this.call <- match.call()
n <- nrow(x)
p <- ncol(x)
stopifnot(y %in% c(0,1))
stopifnot(length(y) == n, lam >= 0, delta >= 0, delta <= 1)
stopifnot(!is.null(step) && !is.null(maxiter))
stopifnot(class(lam) == "numeric")
stopifnot(class(delta) == "numeric")
stopifnot(class(step) == "numeric", step > 0, maxiter > 0)
stopifnot(is.finite(x), is.finite(y), is.finite(lam), is.finite(delta))
lam.l1 <- lam * (1 - delta)
lam.l2 <- lam * delta
if (!center)
cat("WARNING: center=FALSE should almost never be used. This option is available for special uses only.", fill = TRUE)
x <- scale(x, center = center, scale = stand.main)
mx <- attr(x, "scaled:center")
sx <- attr(x, "scaled:scale")
if (is.null(aa)) aa <- list(b0=0, bp=rep(0, p), bn=rep(0, p), th=matrix(0, p, p), diagonal=diagonal)
if (is.null(zz)) {
if (trace > 0) cat("Computing zz...", fill=TRUE)
zz <- compute.interactions.c(x, diagonal=diagonal)
}
if (is.matrix(zz)) {
zz <- scale(zz, center=center, scale=stand.int)
mzz <- attr(zz, "scaled:center")
szz <- attr(zz, "scaled:scale")
zz <- as.numeric(zz)
} else {
mzz <- szz <- NULL
#cat("Provided zz is not a matrix, so it's assumed to be already centered.", fill = TRUE)
}
xnum <- as.numeric(x)
if (strong) {
# strong hierarchy -- use ADMM4 (logistic regression version)
stopifnot(is.numeric(rho), is.finite(rho))
out <- admm4.logistic(x, xnum, y, lam.l1, lam.l2, diagonal=diagonal, zz=zz,
rho=rho, niter=niter, aa=aa, sym.eps=sym.eps, # ADMM params
stepsize=step, backtrack=backtrack, maxiter=maxiter, tol=tol, # GG params
trace=trace)
ii <- out$bp + out$bn == 0
# note out$th[ii, ] = 0 since weak hierarchy holds for sure
sumii <- sum(ii)
if (sumii > 0 && sumii < p) {
thr <- max(abs(out$th[!ii, ii]))
if (thr > 0) {
cat(" thr = ",thr, fill=TRUE)
if (thr > 1e-3)
warning("Had to change ADMM's 'th' by more than 0.001 to make strong hier hold! Increase niter (and/or rho). ")
aa$th[abs(aa$th) <= thr] <- 0
}
}
} else {
out <- ggdescent.logistic(xnum=xnum, zz=zz, y=y, lam.l1=lam.l1, lam.l2=lam.l2, diagonal=diagonal, rho=0, V=matrix(0,p,p),
stepsize=step, backtrack=backtrack, maxiter=maxiter,
tol=tol, aa=aa, trace=trace)
}
out$call <- this.call
out$lam <- lam
out$delta <- delta
out$type <- "logistic"
out$diagonal <- diagonal
out$strong <- strong
if (strong) {
# ADMM parameters:
out$rho <- rho
out$niter <- niter
out$sym.eps <- sym.eps
}
out$step <- step
out$maxiter <- maxiter
out$backtrack <- backtrack
out$tol <- tol
out$obj <- critf.logistic(x, y, lam.l1, lam.l2, out$b0, out$bp, out$bn, out$th)
out$mx <- mx
out$my <- 0
out$sx <- sx
out$mzz <- mzz
class(out) <- "hierNet"
return(out)
}
admm4.logistic <- function(x, xnum, y, lam.l1, lam.l2, diagonal, zz=NULL, rho=10, niter, aa=NULL, sym.eps=1e-3, trace=1, ...) {
# Performs ADMM4 for logistic loss.
# Note: xnum is the matrix x as a numeric. Both are passed to avoid having to call as.numeric too
# many times.
p <- ncol(x)
if (is.null(zz)) {
if (trace > 0) cat("Computing zz...", fill=TRUE)
zz <- as.numeric(compute.interactions.c(x, diagonal=diagonal))
}
else if (class(zz) == "matrix") zz <- as.numeric(zz)
if (is.null(aa)) {
aa <- list(u=matrix(0, p, p),
th=matrix(0, p, p),
bp=rep(0, p),
bn=rep(0, p),
tt=matrix(0, p, p),
diagonal=diagonal)
}
if (is.null(aa$tt) || is.null(aa$u)) {
aa$tt <- 0.5 * (aa$th + t(aa$th))
aa$u <- matrix(0, p, p)
}
obj <- Objective.logistic(aa=aa, x=x, y=y, lam.l1=lam.l1, lam.l2=lam.l2, xnum=xnum, zz=zz, strong=TRUE, sym.eps=sym.eps)
for (i in seq(niter)) {
if (trace > 0) cat(i, " ")
V <- aa$u - rho * aa$tt
gg <- ggdescent.logistic(xnum, zz, y, lam.l1=lam.l1, lam.l2=lam.l2, diagonal=diagonal, rho, V, trace=trace-1, aa=aa, ...)
aa$th <- gg$th
aa$bp <- gg$bp
aa$bn <- gg$bn
aa$tt <- (aa$th + t(aa$th)) / 2 + (aa$u + t(aa$u)) / (2 * rho)
aa$u <- aa$u + rho * (aa$th - aa$tt)
obj <- c(obj, Objective.logistic(aa=aa, x=x, y=y, lam.l1=lam.l1, lam.l2=lam.l2, xnum=xnum, zz=zz, strong=TRUE, sym.eps=sym.eps))
if (trace > 0) cat(obj[i+1], fill=TRUE)
}
if (max(abs(aa$th-t(aa$th))) > sym.eps)
cat("Attention: th not symmetric within the desired sym.eps. Run ADMM for more iterations. And try increasing rho.")
aa$obj <- obj
aa
}
ggdescent.logistic <- function(xnum, zz, y, lam.l1, lam.l2, diagonal, rho, V, stepsize, backtrack=0.2, maxiter=100,
tol=1e-5, aa=NULL, trace=1) {
# See ADMM4 pdf and logistic.pdf for the problem this solves.
#
# xnum, zz, y: data (note: zz is a length n*cp2 vector, not a matrix) xnum is x as a (n*p)-vector
# lam.l1: l1-penalty parameter
# lam.l2: l2-penalty parameter
# rho: admm parameter
# V: see ADMM4 pdf
# stepsize: step size to start backtracking with
# backtrack: factor by which step is reduced on each backtrack.
# maxiter: number of generalized gradient steps to take.
# tol: stop gg descent if change in objective is below tol.
# aa: initial estimate of (b0, th, bp, bn)
# trace: how verbose to be
#
#void ggdescent_logistic_R(double *x, int *n, int *p, double *zz, int * diagonal, double *y,
# double *lamL1, double *lamL2, double *rho, double *V, int *maxiter,
# double *curb0, double *curth, double *curbp, double *curbn,
# double *t, int *stepwindow, double *backtrack, double *tol, int *trace,
# double *b0, double *th, double *bp, double *bn) {
n <- length(y)
p <- length(xnum) / n
stopifnot(p == round(p))
if (diagonal) stopifnot(length(zz) == n * (choose(p,2)+p))
else stopifnot(length(zz) == n * choose(p,2))
stepwindow <- 10
if (is.null(aa)) aa <- list(b0=0, th=matrix(0,p,p), bp=rep(0,p), bn=rep(0,p))
out <- .C("ggdescent_logistic_R",
xnum,
as.integer(n),
as.integer(p),
zz,
as.integer(diagonal),
as.double(y), # convert from integer to double
as.double(lam.l1),
as.double(lam.l2),
as.double(rho),
as.double(V),
as.integer(maxiter),
as.double(aa$b0),
as.double(aa$th),
aa$bp,
aa$bn,
as.double(stepsize),
as.integer(stepwindow),
as.double(backtrack),
as.double(tol),
as.integer(trace),
b0=as.double(0),
th=rep(0, p*p),
bp=rep(0, p),
bn=rep(0, p),
DUP=FALSE, PACKAGE="hierNet")
list(b0=out$b0, bp=out$bp, bn=out$bn, th=matrix(out$th, p, p))
}
ADMM4.Lagrangian <- function(aa, xnum, zz, y, lam.l1, lam.l2, diagonal, rho) {
# aa: list with (th, bp, bn, tt, u)
# zz is a vector not a matrix
if (aa$diagonal == FALSE)
if (any(abs(diag(aa$th)) > 1e-8)) {
cat("Diagonal of Theta is nonzero.", fill=TRUE)
return(Inf)
}
if (max(aa$tt-t(aa$tt)) > 1e-8) {
cat("Theta is not symmetric.", fill=TRUE)
return(Inf)
}
if (any(rowSums(abs(aa$th)) > aa$bp + aa$bn + 1e-5)) {
cat("hierarchy violated.", fill=TRUE)
return(Inf)
}
if (any(aa$bp < -1e-5)||any(aa$bn < -1e-5)) {
cat("Non-negative of bp or bn violated.", fill=TRUE)
return(Inf)
}
if (diagonal == FALSE)
if (any(abs(diag(aa$th)) > 1e-5)) {
cat("Zero diagonal violated.", fill=TRUE)
return(Inf)
}
V <- aa$u - rho * aa$tt
r <- y - Compute.yhat.c(xnum, zz, aa)
admm <- sum(aa$u*(aa$th-aa$tt)) + (rho/2) * sum((aa$th-aa$tt)^2)
#admm <- sum(V*aa$th) + (rho/2) * sum(aa$th^2) + (rho/2)*sum(aa$tt^2) - sum(aa$u*aa$tt)
pen <- lam.l1 * (sum(aa$bp + aa$bn) + sum(abs(aa$th))/2)
pen <- pen + lam.l2 * (sum(aa$bp^2) + sum(aa$bn^2) + sum(aa$th^2))
sum(r^2)/2 + pen + admm
}
predict.hierNet.logistic <- function(object, newx, newzz=NULL, ...) {
predict.hierNet(object, newx, newzz, ...)
}
critf.logistic <- function(x, y, lam.l1, lam.l2, b0, bp, bn, th) {
yhat <- b0 + x %*% (bp - bn) + 0.5 * diag(x %*% th %*% t(x))
p <- 1 / (1 + exp(-yhat))
val <- -sum(y * log(p) + (1 - y) * log(1 - p))
val <- val + lam.l1 * sum(bp + bn) + lam.l1 * sum(abs(th))/2 + lam.l1 * sum(abs(diag(th)))/2
val <- val + lam.l2 * (sum(bp^2) + sum(bn^2) + sum(th^2))
return(val)
}
twonorm <- function(x) {sqrt(sum(x * x))}
hierNet.logistic.path <- function (x, y, lamlist=NULL, delta=1e-8, minlam=NULL, maxlam=NULL, flmin=.01, nlam=20,
diagonal=TRUE, strong=FALSE, aa=NULL,
zz=NULL, stand.main=TRUE, stand.int=FALSE,
rho=nrow(x), niter=100, sym.eps=1e-3,# ADMM params
step=1, maxiter=2000, backtrack=0.2, tol=1e-5, # GG params
trace=0) {
this.call=match.call()
stopifnot(y %in% c(0, 1))
x <- scale(x, center=TRUE, scale=stand.main)
mx <- attr(x, "scaled:center")
sx <- attr(x, "scaled:scale")
if (is.null(maxlam)) {
if (!is.null(minlam)) stop("Cannot have maxlam=NULL if minlam is non-null.")
maxlam <- max(abs(t(x) %*% y))
minlam <- maxlam * flmin
}
if (is.null(minlam)) minlam <- maxlam * flmin
if (is.null(lamlist))
lamlist <- exp(seq(log(maxlam), log(minlam), length=nlam))
nlam <- length(lamlist)
if (is.null(zz))
zz <- compute.interactions.c(x, diagonal=diagonal)
else stopifnot(is.matrix(zz))
zz <- scale(zz, center=TRUE, scale=stand.int)
mzz <- attr(zz, "scaled:center")
szz <- attr(zz, "scaled:scale")
zz <- as.numeric(zz)
p <- ncol(x)
cp2 <- choose(p, 2)
b0 <- rep(NA, nlam)
bp <- bn <- matrix(NA, nrow=p, ncol=nlam)
th <- array(NA, c(p, p, nlam))
obj <- rep(NA, nlam)
aa <- NULL
for (i in seq(nlam)) {
cat(c("i,lam=", i, round(lamlist[i],2)), fill=TRUE)
aa <- hierNet.logistic(x, y, lam=lamlist[i], delta=delta, diagonal=diagonal, strong=strong,
aa=aa, zz=zz, stand.main=FALSE, stand.int=FALSE,
rho=rho, niter=niter, sym.eps=sym.eps,
step=step, maxiter=maxiter, backtrack=backtrack, tol=tol,
trace=trace)
b0[i] <- aa$b0
bp[, i] <- aa$bp
bn[, i] <- aa$bn
th[, , i] <- aa$th
obj[i] <- aa$obj
}
dimnames(bp) <- dimnames(bn) <- list(as.character(1:p), NULL)
dimnames(th) <- list(as.character(1:p), as.character(1:p), NULL)
out <- list(b0=b0, bp=bp, bn=bn, th=th, obj=obj, lamlist=lamlist, delta=delta,
mx=mx, my=0, sx=sx, mzz=mzz, szz=szz,
type="logistic", diagonal=diagonal, strong=strong,
step=step, maxiter=maxiter, backtrack=backtrack, tol=tol,
call=this.call)
if (strong) {
# ADMM parameters:
out$rho <- aa$rho
out$niter <- niter
out$sym.eps <- sym.eps
}
class(out) <- "hierNet.path"
out
}
balanced.folds <- function(y, nfolds=min(min(table(y)), 10)) {
totals <- table(y)
fmax <- max(totals)
nfolds <- min(nfolds, fmax)
# makes no sense to have more folds than the max class size
folds <- as.list(seq(nfolds))
yids <- split(seq(y), y)
# nice we to get the ids in a list, split by class
###Make a big matrix, with enough rows to get in all the folds per class
bigmat <- matrix(NA, ceiling(fmax/nfolds) * nfolds, length(totals))
for(i in seq(totals)) {
bigmat[seq(totals[i]), i] <- sample(yids[[i]])
}
smallmat <- matrix(bigmat, nrow = nfolds) # reshape the matrix
### Now do a clever sort to mix up the NAs
smallmat <- permute.rows(t(smallmat)) ### Now a clever unlisting
# the "clever" unlist doesn't work when there are no N
# apply(smallmat, 2, function(x)
# x[!is.na(x)])
res <-vector("list", nfolds)
for(j in 1:nfolds) {
jj <- !is.na(smallmat[, j])
res[[j]] <- smallmat[jj, j]
}
return(res)
}
permute.rows <-function(x) {
dd <- dim(x)
n <- dd[1]
p <- dd[2]
mm <- runif(length(x)) + rep(seq(n) * 10, rep(p, n))
matrix(t(x)[order(mm)], n, p, byrow = TRUE)
}
hierNet.cv <- function(fit, x, y, nfolds=10, folds=NULL, trace=0) {
this.call <- match.call()
stopifnot(class(fit) == "hierNet.path")
if(fit$type=="gaussian"){errfun=function(y,yhat){(y-yhat)^2}}
if(fit$type=="logistic"){errfun=function(y,yhat){1*(y!=yhat)}}
n <- length(y)
if(is.null(folds)) {
folds <- split(sample(1:n), rep(1:nfolds, length = n))
}
else {
stopifnot(class(folds)=="list")
nfolds <- length(folds)
}
lamlist=fit$lamlist
# get whether fit was standardized based on fit$sx and fit$szz...
if (is.null(fit$mx)) stop("hierNet object was not centered. hierNet.cv has not been written for this (unusual) case.")
stand.main <- !is.null(fit$sx)
stand.int <- !is.null(fit$szz)
n.lamlist <- length(lamlist) ### Set up the data structures
size <- double(n.lamlist)
err2=matrix(NA,nrow=nfolds,ncol=length(lamlist))
for(ii in 1:nfolds) {
cat("Fold", ii, ":")
if(fit$type=="gaussian"){
a <- hierNet.path(x[-folds[[ii]],],y=y[-folds[[ii]]],
lamlist=lamlist, delta=fit$delta, diagonal=fit$diagonal, strong=fit$strong, trace=trace,
stand.main=stand.main, stand.int=stand.int,
rho=fit$rho, niter=fit$niter, sym.eps=fit$sym.eps, # ADMM parameters (which will be NULL if strong=F)
step=fit$step, maxiter=fit$maxiter, backtrack=fit$backtrack, tol=fit$tol) # GG descent params
yhatt=predict.hierNet(a,newx=x[folds[[ii]],])
}
if(fit$type=="logistic"){
a <- hierNet.logistic.path(x[-folds[[ii]],],y=y[-folds[[ii]]],
lamlist=lamlist, delta=fit$delta, diagonal=fit$diagonal, strong=fit$strong,
trace=trace, stand.main=stand.main, stand.int=stand.int,
rho=fit$rho, niter=fit$niter, sym.eps=fit$sym.eps, # ADMM parameters (which will be NULL if strong=F)
step=fit$step, maxiter=fit$maxiter, backtrack=fit$backtrack, tol=fit$tol) # GG descent params
yhatt=predict.hierNet.logistic(a,newx=x[folds[[ii]],])$yhat
}
temp=matrix(y[folds[[ii]]],nrow=length(folds[[ii]]),ncol=n.lamlist)
err2[ii,]=colMeans(errfun(yhatt,temp))
cat("\n")
}
errm=colMeans(err2)
errse=sqrt(apply(err2,2,var)/nfolds)
o=which.min(errm)
lamhat=lamlist[o]
oo=errm<= errm[o]+errse[o]
lamhat.1se=lamlist[oo & lamlist>=lamhat][1]
nonzero=colSums(fit$bp-fit$bn!=0) + apply(fit$th!=0, 3, function(a) sum(diag(a)) + sum((a+t(a)!=0)[upper.tri(a)]))
obj <- list(lamlist=lamlist, cv.err=errm,cv.se=errse,lamhat=lamhat, lamhat.1se=lamhat.1se,
nonzero=nonzero, folds=folds,
call = this.call)
class(obj) <- "hierNet.cv"
obj
}
plot.hierNet.cv <- function(x, ...) {
par(mar = c(5, 5, 5, 1))
yrang=range(c(x$cv.err-x$cv.se,x$cv.err+x$cv.se))
plot(log(x$lamlist), x$cv.err, xlab="log(lambda)",
ylab = "Cross-validation Error", type="n",ylim=yrang)
axis(3, at = log(x$lamlist), labels = paste(x$nonzero), srt = 90, adj = 0)
mtext("Number of features", 3, 4, cex = 1.2)
axis(2, at = c(0, 0.2, 0.4, 0.6, 0.8))
error.bars(log(x$lamlist), x$cv.err - x$cv.se, x$cv.err + x$cv.se, width = 0.01, col = "darkgrey")
points(log(x$lamlist), x$cv.err, col=2, pch=19)
abline(v=log(x$lamhat), lty=3)
abline(v=log(x$lamhat.1se), lty=3)
invisible()
}
error.bars <-function(x, upper, lower, width = 0.02, ...) {
xlim <- range(x)
barw <- diff(xlim) * width
segments(x, upper, x, lower, ...)
segments(x - barw, upper, x + barw, upper, ...)
segments(x - barw, lower, x + barw, lower, ...)
range(upper, lower)
}
hierNet.varimp <- function(fit,x,y, ...) {
# NOTE: uses 0.5 cutoff for logistic case
lam=fit$lam
if(fit$type=="gaussian"){errfun=function(y,yhat){(y-yhat)^2}}
if(fit$type=="logistic"){
errfun=function(y,yhat){
term1=y*log(yhat);term1[yhat==0]=0
term2=(1-y)*log(1-yhat);term2[yhat==1]=0
val=-sum(term1+term2)
return(val)
}}
yhat=predict(fit,x)
rss=sum(errfun(y,yhat))
varsum=fit$bp-fit$bn+rowSums(abs(fit$th))
oo=which(abs(varsum)>1e-6)
imp=rss2=rep(NA,ncol(x))
for(j in oo){
cat(j)
fit0=fit;fit0$bp=fit$bp[-j];fit0$bn=fit$bn[-j];fit0$th=fit$th[-j,-j]
if(fit$type=="gaussian"){ fit2=hierNet(x[,-j],y,lam,delta=fit$delta,diagonal=fit$diagonal,aa=fit0)}
if(fit$type=="logistic"){ fit2=hierNet.logistic(x[,-j],y,lam,delta=fit$delta,diagonal=fit$diagonal,aa=fit0)}
yhat2=predict(fit2,x[,-j])
rss2[j]=sum(errfun(y,yhat2))
imp[j]=(rss2[j]-rss)/rss2[j]
}
imp[-oo]=0
res=cbind(1:ncol(x),round(imp,3))
ooo=order(-imp)
dimnames(res)=list(NULL,c("Predictor","Importance"))
cat("",fill=T)
return(res[ooo,])
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.