Nothing
# Lasso inference function (for fixed lambda). Note: here we are providing inference
# for the solution of
# min 1/2 || y - \beta_0 - X \beta ||_2^2 + \lambda || \beta ||_1
fixedLassoInf <- function(x, y, beta,
lambda,
family=c("gaussian","binomial","cox"),
intercept=TRUE,
add.targets=NULL,
status=NULL,
sigma=NULL,
alpha=0.1,
type=c("partial", "full"),
tol.beta=1e-5,
tol.kkt=0.1,
gridrange=c(-100,100),
bits=NULL,
verbose=FALSE,
linesearch.try=10) {
family = match.arg(family)
this.call = match.call()
type = match.arg(type)
if(family=="binomial") {
if(type!="partial") stop("Only type= partial allowed with binomial family")
out=fixedLogitLassoInf(x,y,beta,lambda,alpha=alpha, type=type, tol.beta=tol.beta, tol.kkt=tol.kkt,
gridrange=gridrange, bits=bits, verbose=verbose,
linesearch.try=linesearch.try,
this.call=this.call)
return(out)
}
else if(family=="cox") {
if(type!="partial") stop("Only type= partial allowed with Cox family")
out=fixedCoxLassoInf(x,y,status,beta,lambda,alpha=alpha, type="partial",tol.beta=tol.beta,
tol.kkt=tol.kkt, gridrange=gridrange, bits=bits, verbose=verbose,this.call=this.call)
return(out)
}
else{
checkargs.xy(x,y)
if (missing(beta) || is.null(beta)) stop("Must supply the solution beta")
if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda")
n = nrow(x)
p = ncol(x)
beta = as.numeric(beta)
if (type == "full") {
if (p > n) {
# need intercept (if there is one) for debiased lasso
hbeta = beta
if (intercept == T) {
if (length(beta) != p + 1) {
stop("Since type='full', p > n, and intercept=TRUE, beta must have length equal to ncol(x)+1")
}
# remove intercept if included
beta = beta[-1]
} else if (length(beta) != p) {
stop("Since family='gaussian', type='full' and intercept=FALSE, beta must have length equal to ncol(x)")
}
}
} else if (length(beta) != p) {
stop("Since family='gaussian' and type='partial', beta must have length equal to ncol(x)")
}
checkargs.misc(beta=beta,lambda=lambda,sigma=sigma,alpha=alpha,
gridrange=gridrange,tol.beta=tol.beta,tol.kkt=tol.kkt)
if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) {
warning("Package Rmpfr is not installed, reverting to standard precision")
bits = NULL
}
if (!is.null(add.targets) && (!is.vector(add.targets)
|| !all(is.numeric(add.targets)) || !all(add.targets==floor(add.targets))
|| !all(add.targets >= 1 && add.targets <= p))) {
stop("'add.targets' must be a vector of integers between 1 and p")
}
# If glmnet was run with an intercept term, center x and y
if (intercept==TRUE) {
obj = standardize(x,y,TRUE,FALSE)
x = obj$x
y = obj$y
}
# Check the KKT conditions
g = t(x)%*%(y-x%*%beta) / lambda
if (any(abs(g) > 1+tol.kkt * sqrt(sum(y^2))))
warning(paste("Solution beta does not satisfy the KKT conditions",
"(to within specified tolerances)"))
tol.coef = tol.beta * sqrt(n / colSums(x^2))
vars = which(abs(beta) > tol.coef)
sign_vars = sign(beta[vars])
if(sum(vars)==0){
cat("Empty model",fill=T)
return()
}
if (any(sign(g[vars]) != sign_vars)) {
warning(paste("Solution beta does not satisfy the KKT conditions",
"(to within specified tolerances). You might try rerunning",
"glmnet with a lower setting of the",
"'thresh' parameter, for a more accurate convergence."))
}
# Get lasso polyhedral region, of form Gy >= u
logical.vars=rep(FALSE,p)
logical.vars[vars]=TRUE
if (type == 'full') {
out = fixedLassoPoly(x, y, lambda, beta, logical.vars, inactive=TRUE)
}
else {
out = fixedLassoPoly(x, y, lambda, beta, logical.vars)
}
A = out$A
b = out$b
# Check polyhedral region
tol.poly = 0.01
if (max(A %*% y - b) > tol.poly * sqrt(sum(y^2)))
stop(paste("Polyhedral constraints not satisfied; you must recompute beta",
"more accurately. With glmnet, make sure to use exact=TRUE in coef(),",
"and check whether the specified value of lambda is too small",
"(beyond the grid of values visited by glmnet).",
"You might also try rerunning glmnet with a lower setting of the",
"'thresh' parameter, for a more accurate convergence."))
# Estimate sigma
if (is.null(sigma)) {
if (n >= 2*p) {
oo = intercept
sigma = sqrt(sum(lsfit(x,y,intercept=oo)$res^2)/(n-p-oo))
}
else {
sigma = sd(y)
warning(paste(sprintf("p > n/2, and sd(y) = %0.3f used as an estimate of sigma;",sigma),
"you may want to use the estimateSigma function"))
}
}
# add additional targets for inference if provided
if (!is.null(add.targets)) {
# vars is boolean...
old_vars = vars & TRUE
vars[add.targets] = TRUE
sign_vars = sign(beta[vars])
sign_vars[!old_vars] = NA
stop("`add.targets` not fully implemented yet")
}
k = length(vars)
pv = vlo = vup = numeric(k)
vmat = matrix(0,k,n)
ci = tailarea = matrix(0,k,2)
if (type=="full" & p > n) {
if (intercept == TRUE) {
pp=p+1
Xint <- cbind(rep(1,n),x)
# indices of selected predictors
S = c(1,vars + 1)
} else {
pp=p
Xint <- x
# indices of selected predictors
S = vars
# notS = which(abs(beta) <= tol.coef)
}
notS = setdiff(1:pp,S)
XS = Xint[,S]
hbetaS = hbeta[S]
# Reorder so that active set S is first
Xordered = Xint[,c(S,notS,recursive=T)]
hsigmaS = 1/n*(t(XS)%*%XS) # hsigma[S,S]
hsigmaSinv = solve(hsigmaS) # pinv(hsigmaS)
FS = rbind(diag(length(S)),matrix(0,pp-length(S),length(S)))
GS = cbind(diag(length(S)),matrix(0,length(S),pp-length(S)))
is_wide = n < (2 * p) # somewhat arbitrary decision -- it is really for when we don't want to form with pxp matrices
# Approximate inverse covariance matrix for when (n < p) from lasso_Inference.R
if (!is_wide) {
hsigma = 1/n*(t(Xordered)%*%Xordered)
htheta = debiasingMatrix(hsigma,
is_wide,
n,
1:length(S),
verbose=FALSE,
max_try=linesearch.try,
warn_kkt=TRUE)
ithetasigma = (GS-(htheta%*%hsigma))
} else {
htheta = debiasingMatrix(Xordered,
is_wide,
n,
1:length(S),
verbose=FALSE,
max_try=linesearch.try,
warn_kkt=TRUE)
ithetasigma = (GS-((htheta%*%t(Xordered)) %*% Xordered)/n)
}
M <- (((htheta%*%t(Xordered))+ithetasigma%*%FS%*%hsigmaSinv%*%t(XS))/n)
# vector which is offset for testing debiased beta's
null_value <- (((ithetasigma%*%FS%*%hsigmaSinv)%*%sign(hbetaS))*lambda/n)
if (intercept == T) {
M = M[-1,] # remove intercept row
null_value = null_value[-1] # remove intercept element
}
} else if (type=="partial" || p > n) {
xa = x[,vars,drop=F]
M = pinv(crossprod(xa)) %*% t(xa)
null_value = rep(0,k)
} else {
M = pinv(crossprod(x)) %*% t(x)
M = M[vars,,drop=F]
null_value = rep(0,k)
}
for (j in 1:k) {
if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j]))
vj = M[j,]
mj = sqrt(sum(vj^2))
vj = vj / mj # Standardize (divide by norm of vj)
if (!is.na(sign_vars[j])) {
vj = sign_vars[j] * vj
}
limits.info = TG.limits(y, A, b, vj, Sigma=diag(rep(sigma^2, n)))
a = TG.pvalue.base(limits.info, null_value=null_value[j], bits=bits)
pv[j] = a$pv
if (is.na(sign_vars[j])) { # for variables not in the active set, report 2-sided pvalue
pv[j] = 2 * min(pv[j], 1 - pv[j])
}
vlo[j] = a$vlo * mj # Unstandardize (mult by norm of vj)
vup[j] = a$vup * mj # Unstandardize (mult by norm of vj)
if (!is.na(sign_vars[j])) {
vmat[j,] = vj * mj * sign_vars[j] # Unstandardize (mult by norm of vj) and fix sign
} else {
vmat[j,] = vj * mj # Unstandardize (mult by norm of vj)
}
a = TG.interval.base(limits.info,
alpha=alpha,
gridrange=gridrange,
flip=(sign_vars[j]==-1),
bits=bits)
ci[j,] = (a$int-null_value[j]) * mj # Unstandardize (mult by norm of vj)
tailarea[j,] = a$tailarea
}
out = list(type=type,
lambda=lambda,
pv=pv,
ci=ci,
tailarea=tailarea,
vlo=vlo,
vup=vup,
vmat=vmat,
y=y,
vars=vars,
sign=sign_vars,
sigma=sigma,
alpha=alpha,
sd=sigma*sqrt(rowSums(vmat^2)),
coef0=vmat%*%y,
call=this.call)
class(out) = "fixedLassoInf"
return(out)
}
}
#############################
fixedLassoPoly =
function(X, y, lambda, beta, active, inactive = FALSE) {
XA = X[, active, drop=FALSE]
XI = X[, !active, drop=FALSE]
XAi = pinv(crossprod(XA))
XAp = XAi %*% t(XA)
Ir = t(XI) %*% t(XAp) # matrix in the "irrepresentable" condition
if(length(lambda)>1) {
lambdaA= lambda[active]
lambdaI = lambda[!active]
} else {
lambdaA = rep(lambda, sum(active))
lambdaI = rep(lambda, sum(!active))
}
penalized = lambdaA != 0
signA = sign(beta[active])
active_subgrad = signA * lambdaA
if (length(signA)>1) sign_diag = diag(signA)
if (length(signA)==1) sign_diag = matrix(signA, 1, 1)
if (inactive) { # should we include the inactive constraints?
RA = diag(rep(1, nrow(XA))) - XA %*% XAp # RA is residual forming matrix of selected model
A = rbind(
t(XI) %*% RA,
-t(XI) %*% RA,
-(sign_diag %*% XAp)[penalized,] # no constraints for unpenalized
)
b = c(
lambdaI - Ir %*% active_subgrad,
lambdaI + Ir %*% active_subgrad,
-(sign_diag %*% XAi %*% active_subgrad)[penalized])
} else {
A = -(sign_diag %*% XAp)[penalized,] # no constraints for unpenalized
b = -(sign_diag %*% XAi %*% active_subgrad)[penalized]
}
return(list(A=A, b=b))
}
##############################
## Approximates inverse covariance matrix theta
## using coordinate descent
debiasingMatrix = function(Xinfo, # could be X or t(X) %*% X / n
# depending on is_wide
is_wide,
nsample,
rows,
verbose=FALSE,
bound=NULL, # starting value of bound
linesearch=TRUE, # do a linesearch?
scaling_factor=1.5, # multiplicative factor for linesearch
max_active=NULL, # how big can active set get?
max_try=10, # how many steps in linesearch?
warn_kkt=FALSE, # warn if KKT does not seem to be satisfied?
max_iter=50, # how many iterations for each optimization problem
kkt_stop=TRUE, # stop based on KKT conditions?
parameter_stop=TRUE, # stop based on relative convergence of parameter?
objective_stop=TRUE, # stop based on relative decrease in objective?
kkt_tol=1.e-4, # tolerance for the KKT conditions
parameter_tol=1.e-4, # tolerance for relative convergence of parameter
objective_tol=1.e-4 # tolerance for relative decrease in objective
) {
if (is.null(max_active)) {
max_active = max(50, 0.3 * nsample)
}
p = ncol(Xinfo);
M = matrix(0, length(rows), p);
if (is.null(bound)) {
bound = (1/sqrt(nsample)) * qnorm(1-(0.1/(p^2)))
}
xperc = 0;
xp = round(p/10);
idx = 1;
for (row in rows) {
if ((idx %% xp)==0){
xperc = xperc+10;
if (verbose) {
print(paste(xperc,"% done",sep="")); }
}
output = debiasingRow(Xinfo, # could be X or t(X) %*% X / n
# depending on is_wide
is_wide,
row,
bound,
linesearch=linesearch,
scaling_factor=scaling_factor,
max_active=max_active,
max_try=max_try,
warn_kkt=FALSE,
max_iter=max_iter,
kkt_stop=kkt_stop,
parameter_stop=parameter_stop,
objective_stop=objective_stop,
kkt_tol=kkt_tol,
parameter_tol=parameter_tol,
objective_tol=objective_tol)
if (warn_kkt && (!output$kkt_check)) {
warning("Solution for row of M does not seem to be feasible")
}
if (!is.null(output$soln)) {
M[idx,] = output$soln;
} else {
stop(paste("Unable to approximate inverse row ", row));
}
idx = idx + 1;
}
return(M)
}
# Find one row of the debiasing matrix -- assuming X^TX/n is not too large -- i.e. X is tall
debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n depending on is_wide
is_wide,
row,
bound,
linesearch=TRUE, # do a linesearch?
scaling_factor=1.5, # multiplicative factor for linesearch
max_active=NULL, # how big can active set get?
max_try=10, # how many steps in linesearch?
warn_kkt=FALSE, # warn if KKT does not seem to be satisfied?
max_iter=50, # how many iterations for each optimization problem
kkt_stop=TRUE, # stop based on KKT conditions?
parameter_stop=TRUE, # stop based on relative convergence of parameter?
objective_stop=TRUE, # stop based on relative decrease in objective?
kkt_tol=1.e-4, # tolerance for the KKT conditions
parameter_tol=1.e-4, # tolerance for relative convergence of parameter
objective_tol=1.e-4 # tolerance for relative decrease in objective
) {
p = ncol(Xinfo)
if (is.null(max_active)) {
max_active = min(nrow(Xinfo), ncol(Xinfo))
}
# Initialize variables
soln = rep(0, p)
soln = as.numeric(soln)
ever_active = rep(0, p)
ever_active[1] = row # 1-based
ever_active = as.integer(ever_active)
nactive = as.integer(1)
linear_func = rep(0, p)
linear_func[row] = -1
linear_func = as.numeric(linear_func)
gradient = 1. * linear_func
counter_idx = 1;
incr = 0;
last_output = NULL
if (is_wide) {
Xsoln = as.numeric(rep(0, nrow(Xinfo)))
}
while (counter_idx < max_try) {
if (!is_wide) {
result = solve_QP(Xinfo, # this is non-neg-def matrix
bound,
max_iter,
soln,
linear_func,
gradient,
ever_active,
nactive,
kkt_tol,
objective_tol,
parameter_tol,
max_active,
kkt_stop,
objective_stop,
parameter_stop)
} else {
result = solve_QP_wide(Xinfo, # this is a design matrix
as.numeric(rep(bound, p)), # vector of Lagrange multipliers
0, # ridge_term
max_iter,
soln,
linear_func,
gradient,
Xsoln,
ever_active,
nactive,
kkt_tol,
objective_tol,
parameter_tol,
max_active,
kkt_stop,
objective_stop,
parameter_stop)
}
iter = result$iter
# Logic for whether we should continue the line search
if (!linesearch) {
break
}
if (counter_idx == 1){
if (iter == (max_iter+1)){
incr = 1; # was the original problem feasible? 1 if not
} else {
incr = 0; # original problem was feasible
}
}
if (incr == 1) { # trying to find a feasible point
if ((iter < (max_iter+1)) && (counter_idx > 1)) {
break; # we've found a feasible point and solved the problem
}
bound = bound * scaling_factor;
} else { # trying to drop the bound parameter further
if ((iter == (max_iter + 1)) && (counter_idx > 1)) {
result = last_output; # problem seems infeasible because we didn't solve it
break; # so we revert to previously found solution
}
bound = bound / scaling_factor;
}
# If the active set has grown to a certain size
# then we stop, presuming problem has become
# infeasible.
# We revert to the previous solution
if (result$max_active_check) {
result = last_output;
break;
}
counter_idx = counter_idx + 1
last_output = list(soln=result$soln,
kkt_check=result$kkt_check)
}
# Check feasibility
if (warn_kkt && (!result$kkt_check)) {
warning("Solution for row of M does not seem to be feasible")
}
return(list(soln=result$soln,
kkt_check=result$kkt_check,
gradient=result$gradient))
}
##############################
print.fixedLassoInf <- function(x, tailarea=TRUE, ...) {
cat("\nCall:\n")
dput(x$call)
cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n",
x$sigma))
cat(sprintf("\nTesting results at lambda = %0.3f, with alpha = %0.3f\n",x$lambda,x$alpha))
cat("",fill=T)
tab = cbind(x$vars,
round(x$coef0,3),
round(x$coef0 / x$sd,3),
round(x$pv,3),round(x$ci,3))
colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt")
if (tailarea) {
tab = cbind(tab,round(x$tailarea,3))
colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea")
}
rownames(tab) = rep("",nrow(tab))
print(tab)
cat(sprintf("\nNote: coefficients shown are %s regression coefficients\n",
ifelse(x$type=="partial","partial","full")))
invisible()
}
#estimateLambda <- function(x, sigma, nsamp=1000){
# checkargs.xy(x,rep(0,nrow(x)))
# if(nsamp < 10) stop("More Monte Carlo samples required for estimation")
# if (length(sigma)!=1) stop("sigma should be a number > 0")
# if (sigma<=0) stop("sigma should be a number > 0")
# n = nrow(x)
# eps = sigma*matrix(rnorm(nsamp*n),n,nsamp)
# lambda = 2*mean(apply(t(x)%*%eps,2,max))
# return(lambda)
#}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.