Nothing
plot_taylor_approximation <- function(
f, # Original function (takes vector input)
grad_f, # Gradient function (returns vector)
hessian_f, # Hessian function (returns matrix)
x0, # Current point (vector)
coord_idx, # Index of coordinate to vary (e.g., 1 for x1)
x_lim = c(x0[coord_idx] - 1, x0[coord_idx] + 1), # Range for varying coordinate
n_points = 100, # Number of points for plotting
... # Other arguments passed to f, grad_f and hessian_f
) {
# Check inputs
if (length(x0) < coord_idx) stop("coord_idx exceeds the dimension of x0")
# Generate sequence for varying coordinate
x_var <- seq(x_lim[1], x_lim[2], length.out = n_points)
# Evaluate original function along the varying coordinate
f_values <- sapply(x_var, function(xi) {
x <- x0
x[coord_idx] <- xi
f(x, ...)
})
# Compute Taylor approximations at x0
grad <- grad_f(x0, ...)
hess <- hessian_f(x0, ...)
delta <- x_var - x0[coord_idx]
# First-order Taylor approximation (linear)
taylor1 <- f(x0, ...) + grad[coord_idx] * delta
# Second-order Taylor approximation (quadratic)
taylor2 <- f(x0, ...) + grad[coord_idx] * delta + 0.5 * hess[coord_idx, coord_idx] * delta^2
# Plot
plot(x_var, f_values, type = "l", lwd = 2, col = "black",
xlab = paste("Coordinate", coord_idx, "(x", coord_idx, ")"),
ylab = "Function value",
main = paste("Function and Taylor Approximations at x[", coord_idx, "] =", x0[coord_idx]))
abline(v = x0[coord_idx], lty = 2, col = "gray") # Mark the current point
lines(x_var, taylor1, col = "red", lwd = 2, lty = 2) # 1st-order approx
lines(x_var, taylor2, col = "blue", lwd = 2, lty = 3) # 2nd-order approx
legend("topright", legend = c("Original", "1st-order Taylor", "2nd-order Taylor"),
col = c("black", "red", "blue"), lwd = 2, lty = c(1, 2, 3))
}
# - Z: In regression, p x q matrix containing the q meta-covariates for the p parameters of interest. It should not include an intercept, as it's already automatically added
# In GGMs, a p x p matrix or list of p x p matrices
# - wini: Optional. Initial value for the q hyper-parameters
# - niter.mcmc= number of iterations in the final MCMC, run once after hyper-parameter estimates have been obtained
# - niter.mstep= number of MCMC iterations in each M-step required to update hyper-parameter estimates
# - niter.eBayes: max number of iterations in the empirical Bayes optimization algorithm. The algorithm also stop when the objective function didn't improve in >=2 iterations
# - priorvar.w: hyper-parameters w follow a prior w ~ N(0, w_priorvar I) where priorvar.w is the prior variance
# - verbose: if TRUE, progress in hyper-parameter estimation is printed
modelSelection_eBayes= function(Z, wini, niter.mcmc= 5000, niter.mstep= 1000, niter.eBayes= 20, priorvar.w, verbose=TRUE, ...) {
# Add intercept and orthogonalize Z wrt intercept
Z= model.matrix(~ ., data= as.data.frame(Z))
zerosd= which(apply(Z, 2, 'sd')==0)
if (length(zerosd) > 1) {
Z= Z[, -zerosd[-1], drop=FALSE] #if there's >1 intercept, keep only the first
}
sel= (colnames(Z) != "(Intercept)")
if (sum(sel) > 0) Z[,sel]= round(residuals(lm(Z[,sel] ~ 1)), 3)
# Call generic eBayes function
modelSelectionGeneric_eBayes(msfun="modelSelection", Z=Z, wini=wini, niter.mcmc=niter.mcmc, niter.mstep=niter.mstep, niter.eBayes=niter.eBayes, priorvar.w=priorvar.w, verbose=verbose, ...)
}
modelSelectionGGM_eBayes= function(Z, wini, niter.mcmc= 5000, niter.mstep= 1000, niter.eBayes= 20, priorvar.w, verbose=TRUE, ...) {
# Check that Z has the right format
if (missing(Z)) {
params= eval(quote(list(...)))
if ("y" %in% names(params)) p= ncol(params$y)
Z = matrix(1, nrow= p*(p-1)/2, ncol=1)
} else {
if (is.matrix(Z)) {
if (nrow(Z) != ncol(Z)) stop("Z must be a square matrix with the same dimensions as y")
Z = cbind(1, matrix(Z[upper.tri(Z)], ncol=1))
} else if (is.list(Z)) {
if (!all(sapply(Z, is.matrix))) stop("Z is a list but some of its entries are not a matrix")
Z = do.call(cbind, lapply(Z, function(A) A[upper.tri(A)]))
} else {
stop("If specified, Z must be a matrix or a list. If missing, it's taken to only contain the intercept")
}
}
# Call generic eBayes function
modelSelectionGeneric_eBayes(msfun="modelSelectionGGM", Z=Z, wini=wini, niter.mcmc=niter.mcmc, niter.mstep=niter.mstep, niter.eBayes=niter.eBayes, priorvar.w=priorvar.w, verbose=verbose, ...)
}
# - modselfun: function returning a list that has a component $margpp storing posterior marginal inclusion probabilities.
# Set "modelSelection" for linear regression, GLMs, GAMs, and "modelSelectionGGM" for Gaussian graphical models
# The function must have input arguments priorModel, niter and verbose
modelSelectionGeneric_eBayes= function(msfun, Z, wini, niter.mcmc= 5000, niter.mstep= 1000, niter.eBayes= 20, priorvar.w, verbose=TRUE, ...) {
if (msfun == "modelSelection") {
modselfun <- modelSelection
# Default hyper-parameters prior variance
if (missing(priorvar.w)) priorvar.w= eBayes_priorelicit(Z=Z, targetprob=0.95, min_priorprob=0.001, prior="zellner")
} else if (msfun == "modelSelectionGGM") {
modselfun <- modelSelectionGGM
# Default hyper-parameters prior variance
if (missing(priorvar.w)) {
if (ncol(Z)==1) {
priorvar.w= eBayes_priorelicit(Z=Z, targetprob=0.95, min_priorprob=0.001, prior="zellner")
} else {
priorvar.w= eBayes_priorelicit(Z=Z, targetprob=0.95, min_priorprob=0.001, prior="zellner")
}
}
} else stop("An invalid 'msfun' was provided")
# Get initial parameter estimates
if (verbose) message("Initializing hyper-parameter estimates... ")
if (missing(wini)) {
# Obtain posterior inclusion prob under Beta-Binomial(1,1) model prior
ms= modselfun(priorModel=modelbbprior(), niter=niter.mstep, verbose=FALSE, ...)
# Extract marginal posterior inclusion probabilities
if (msfun == "modelSelectionGGM") {
k = (1:ms$p)
exclude_param <- k*(k+1)/2
pip= ms$margpp[-exclude_param] #diagonal entries are always non-zero (PIP=1)
} else {
pip= ms$margpp
}
# Set initial estimate using solution from full-rank/clustering case
wini= eBayes_mstep_init(Z=Z, postprob=pip, priorvar.w=priorvar.w)
w= wini$w
} else {
w= wini
wini= eBayes_mstep_init(Z=Z)
if (msfun == "modelSelectionGGM") {
p = (1 + sqrt(1 + 8 * nrow(Z)))/2 #figure out p from number of upper-triangular entries (rows in Z)
k = (1:p)
exclude_param <- k*(k+1)/2
}
}
# Iterate
i = 1
found= FALSE
Vinv= (t(Z) %*% Z) / ncol(Z)
priorhess= - Vinv / priorvar.w
fval= double(niter.eBayes + 1)
fval[1]= fbest= wbest = NA
noimprove= 0
while (!found) {
# Obtain current prior inclusion probabilities
if ((msfun == "modelSelectionGGM") && (ncol(Z) == 1)) {
priorprob= as.vector(1 / (1 + exp(- w)))
} else {
priorprob= 1 / (1 + exp(- Z %*% w))
}
# E-step: find posterior probabilities for current w
priorModel= modelbinomprior(priorprob)
ms= modselfun(priorModel=priorModel, niter=niter.mstep, verbose=FALSE, ...)
# Extract marginal posterior inclusion probabilities
if (msfun == "modelSelectionGGM") {
pip <- ms$margpp[-exclude_param] #diagonal entries are always non-zero (PIP=1)
} else {
pip <- ms$margpp
}
# M-step
if (wini$fullrankcase) {
wnew= eBayes_mstep_fullrank(Z=Z, Uinv=wini$Uinv, postprob=pip, wcur=w, priorvar.w=priorvar.w)
} else {
wnew= eBayes_mstep(wcurrent=w, Z=Z, postprob=pip, maxiter=10, priorvar.w=priorvar.w, Vinv=Vinv, priorhess=priorhess)$w
}
maxstep= max(abs(wnew - w))
# Objective function
if (msfun == "modelSelection") {
fval[i+1]= eBayes_logit_objective(wnew, msfit=ms, Z=Z, priorvar.w=priorvar.w, Vinv=Vinv)
if (is.na(fbest)) {
fbest= fval[i]= eBayes_logit_objective(w, msfit=ms, Z=Z, priorvar.w=priorvar.w, Vinv=Vinv)
wbest= w
if (verbose) message(paste("Done\n\n","Iter", "Objective fun", "Hyper-parameter", "\n", i, fval[i], paste(w, collapse=" "), "\n"))
}
if (fval[i+1] >= fval[i]) { noimprove= 0 } else { noimprove= noimprove + 1 }
if (fval[i+1] > fbest) { fbest= fval[i+1]; wbest= wnew }
found = (i >= niter.eBayes) || (maxstep < 0.01) || (noimprove > 1)
} else {
if (is.na(wbest)) {
wbest= w
if (verbose) message(paste("Done\n\n","Iter", "Hyper-parameter", "\n", i, paste(w, collapse=" "), "\n"))
}
found = (i >= niter.eBayes) || (maxstep < 0.01)
}
w= wnew
i= i + 1
if (verbose) message(paste(" ", i, ifelse(msfun=="modelSelection", fval[i],""), paste(w, collapse=" "), "\n"))
}
# Run modelSelection with empirical Bayes prior probabilities
if (verbose) message("Done. \n","Final MCMC with best hyper-parameter value... ")
w= wbest
# Obtain current prior inclusion probabilities
if ((msfun == "modelSelectionGGM") & (ncol(Z) == 1)) {
priorprob= as.vector(1 / (1 + exp(- w)))
} else {
priorprob= 1 / (1 + exp(- Z %*% w))
}
priorModel= modelbinomprior(priorprob)
if (msfun == "modelSelection") {
ms= modelSelection(priorModel=priorModel, niter=niter.mcmc, verbose=FALSE, ...)
} else {
ms= modelSelectionGGM(priorModel=priorModel, niter=niter.mcmc, verbose=FALSE, ...)
}
ms$eBayes_hyperpar= w
ms$Z= Z
if (verbose) message("Done\n")
return(ms)
}
# Find prior variance of hyper-parameters such that all prior inclusion probabilities are in (min_priorprob, 1 - min_priorprob) with >= targetprob probability
# Hyper-parameters w follow a prior w ~ N(0, priorvar.w V), where V=I when prior == "normalshrinkage" and V= (Z^T Z / nrow(Z))^{-1} when prior == "zellner"
# The prior inclusion probability for a covariate that has meta-covariates z is m(w)= 1 / (1 + e^{-z^T w})
#
# Input
# - Z: p x q matrix containing the q meta-covariates for the p covariates. It should not include an intercept, as it's already automatically added
# - targetprob
# - min_priorprob
# Output: value of priorvar.w such that the prior prob that m(z) >= targetprob for all z values obtained by taking a row from Z
eBayes_priorelicit= function(Z, targetprob=0.95, min_priorprob=0.001, prior="zellner") {
if (min_priorprob >= 0.5) stop("min_priorprob must be <= 0.5")
#logit= function(x) log(x / (1-x))
priorvar.w= (log(min_priorprob/(1-min_priorprob)) / qnorm((1-targetprob)/2))^2
if (prior == "zellner") {
V= solve((t(Z) %*% Z) / nrow(Z))
v= mahalanobis(Z, center=rep(0,ncol(Z)), cov=V, inverted=TRUE)
vmax= max(v)
} else if (prior == "normalshrinkage") {
vmax= max(rowSums(Z^2))
}
priorvar.w= priorvar.w / vmax
#unique(apply(Z, 1, function(z) 1 - 2 * pnorm(logit(0.05) / sqrt(priorvar.w * mahalanobis(z, center=rep(0,ncol(Z)), cov=V, inverted=TRUE))))) #check: should all be >= targetprob
return(priorvar.w)
}
# Exact M-step solution when the number of unique rows in Z is ncol(Z), and U which is the ncol(Z) x ncol(Z) matrix containing these unique rows has full rank
# The M-step seeks w such that Z^T m(w) = Z^T postprob, where m(z) = 1 / (1 + exp{- Z w})
eBayes_mstep_fullrank= function(Z, Uinv, postprob, wcur, priorvar.w) {
if (missing(Uinv)) {
U= unique(Z)
if (nrow(U) != ncol(U)) stop("The number of unique rows in Z is not ncol(Z)")
Uinv= solve(U)
} else {
if (ncol(Uinv) != ncol(Z)) stop("Uinv must have the same number of columns as Z")
if (nrow(Uinv) != ncol(Uinv)) stop("Uinv must be a square matrix")
}
Ztilde= Z %*% Uinv
meanpp= (t(Ztilde) %*% postprob) / colSums(Ztilde)
if (missing(wcur)) wcur= log(meanpp / (1-meanpp))
wtilde= double(ncol(Z))
for (j in 1:length(wtilde)) {
wtilde[j]= eBayes_fullrank_solve_modifiedlogistic(a=meanpp[j], c= priorvar.w * nrow(Z), w_initial=wcur[j])
}
what= Uinv %*% wtilde
#grad= eBayes_em_logit_grad(what, postprob=postprob, Z=Z) #check: gradient should be 0
return(what)
}
# Newton-Raphson solver for f(w) = a, where f(w) = 1 / (1 + exp(-w)) + w/c
eBayes_fullrank_solve_modifiedlogistic <- function(a, c, w_initial, tol = 1e-3, max_iter = 100) {
if (missing(w_initial)) w_initial= log(a / (1-a)) #solution for the c=infinity case
# Define f(w) and its derivative f'(w)
f <- function(w) { 1 / (1 + exp(-w)) + w / c - a }
f_prime <- function(w) { exp(-w) / (1 + exp(-w))^2 + 1 / c }
# Initialize
w <- w_initial
iter <- 0
converged <- FALSE
# Newton-Raphson iterations
while (iter < max_iter && !converged) {
f_val <- f(w)
f_deriv <- f_prime(w)
if (abs(f_val) < tol) {
converged <- TRUE
} else {
w <- w - f_val / f_deriv # Newton update
iter <- iter + 1
}
}
return(w)
}
# Find an initial value for w solving the M-step equation Z^T m(w) = Z^T postprob, where m(z) = 1 / (1 + exp{- Z w})
#
# The exact solution is returned when the matrix U containing the unique rows in Z has ncol(Z) rows and is full-rank
# Otherwise, an approximated w is returned by clustering Z into ncol(Z) clusters and replacing Z by the cluster means (which, if they're full-rank, admit a closed-form solution
# Input
# - Z: p x q matrix recording q meta-covariates for p covariates
# - postprob: vector of length p with posterior probabilities for each covariate at current w
# - priorvar.w: the prior on w is N(0, priorvar.w * V) where V= (Z^T Z/nrow(Z))^{-1}
# Output: a list with the following components
# - w: value of w. Only returned if postprob is not missing
# - fullrankcase: TRUE if Z satisfies the full-rank case conditions
# - Uinv: inverse of U if fullrankcase==TRUE, and NULL otherwise
eBayes_mstep_init= function(Z, postprob, priorvar.w=priorvar.w) {
if (!missing(postprob)) {
if (is.vector(postprob)) postprob= matrix(postprob, ncol=1)
}
U= unique(Z)
wini= NULL
fullrankcase= FALSE
if (nrow(U) == ncol(U)) {
Uinv= try(solve(U), silent=TRUE)
if (!inherits(Uinv, "try-error")) {
if (!missing(postprob)) wini= eBayes_mstep_fullrank(Z, Uinv=Uinv, postprob=postprob, priorvar.w=priorvar.w)
fullrankcase= TRUE
}
}
#if (is.null(wini)) {
# clus= kmeans(Z, centers=ncol(Z))$cluster
# clusmeans= aggregate(Z, by = list(clus = clus), FUN = mean)
# Zc= merge(data.frame(rowid=1:nrow(Z), clus), clusmeans, by="clus")
# Zc= Zc[order(Zc$rowid,decreasing=FALSE),]
# Zc= as.matrix(Zc[,-1:-2]) #remove cluster and row id
# Uinv= try(solve(clusmeans[,-1]), silent=TRUE)
# if (!inherits(Uinv, "try-error")) wini= eBayes_mstep_fullrank(Zc, Uinv=Uinv, postprob=postprob, priorvar.w=priorvar.w)
#}
if (!missing(postprob)) {
if (is.null(wini)) {
logitpp= log((postprob + .001) / (1 - postprob + .001))
wini= matrix(coef(lm(logitpp ~ -1 + Z)), ncol=1)
}
} else {
wini= NULL
}
# Return output
if (!fullrankcase) Uinv= NULL
ans= list(w= wini, fullrankcase= fullrankcase, Uinv= Uinv)
return(ans)
}
# Use Newton-Raphson to solve the M-step equation Z^T m(w) = Z^T postprob, where m(z) = 1 / (1 + exp{- Z w})
# Input
# - wcurrent: current value of w
# - Z: p x q matrix containing the q meta-covariates for the p covariates. It should not include an intercept, as it's already automatically added
# - postprob: vector of length p with posterior probabilities for each covariate at w= wcurrent
# - priorvar.w: prior on w is N(0, priorvar.w V) where V= (Z^T Z/nrow(Z))
# - maxiter: maximum number of iterations
# Output
# - w: solution of the M-step equation
# - grad: gradient of the objective function Z^T ( m(w) - postprob ) at w. Upon convergence, it should be zero
eBayes_mstep= function(wcurrent, Z, postprob, priorvar.w=priorvar.w, Vinv=Vinv, priorhess=priorhess, maxiter=10) {
if (missing(priorhess)) priorhess= - Vinv / priorvar.w
# Peform a first Newton-Raphson step
w = wcurrent
priorprob= 1 / (1 + exp(- Z %*% w))
priorgrad= - (Vinv %*% w) / priorvar.w
grad= priorgrad + eBayes_em_logit_grad(w, postprob=postprob, Z=Z, priorprob=priorprob)
hess= priorhess + eBayes_em_logit_hess(w, postprob=postprob, Z=Z, priorprob=priorprob)
delta= - base::solve(hess, grad)
#Id= diag(ncol(Z))
#delta= - base::solve(hess - Id, grad)
w= w + delta
maxstep= max(abs(delta))
if (maxstep < 0.01) found= TRUE else found= FALSE
iter= 0
while (!found) {
#eBayes_em_logit_objective(w, msfit=ms, Z=Z, priorvar.w=priorvar.w, Vinv=Vinv)
priorprob= 1 / (1 + exp(- Z %*% w))
priorgrad= - (Vinv %*% w) / priorvar.w
grad= priorgrad + eBayes_em_logit_grad(w, postprob=postprob, Z=Z, priorprob=priorprob)
hess= priorhess + eBayes_em_logit_hess(w, postprob=postprob, Z=Z, priorprob=priorprob)
#numDeriv::grad(eBayes_em_logit_objective, w, msfit=ms, Z=Z, priorvar.w=priorvar.w, Vinv=Vinv) #matches
#numDeriv::hessian(eBayes_em_logit_objective, w, msfit=ms, Z=Z, priorvar.w=priorvar.w, Vinv=Vinv) #matches
#plot_taylor_approximation(f=eBayes_em_logit_objective, grad_f= eBayes_em_logit_grad, hessian_f= eBayes_em_logit_hess, x0= w, coord_idx=1, x_lim = c(-7,-4), n_points = 50, Z=Z, msfit=ms, postprob=pp0)
#plot_taylor_approximation(f=eBayes_em_logit_objective, grad_f= eBayes_em_logit_grad, hessian_f= eBayes_em_logit_hess, x0= w, coord_idx=2, x_lim = c(2,5), n_points = 50, Z=Z, msfit=ms, postprob=pp0)
#delta_damp= - base::solve(hess - Id, grad)
delta= - base::solve(hess, grad)
w= w + delta
iter= iter + 1
maxstep= max(abs(delta))
if ((iter > maxiter) || maxstep < 0.001) found= TRUE
}
# Return output
ans= list(w=w, grad=grad)
}
# Objective function in the empirical Bayes problem
# The objective function is sum_gamma pi(y | gamma) pi(gamma | w)= sum_gamma h(gamma) pi(gamma | y, w)
# where h(gamma)= pi(y | gamma) pi(gamma | w) / pi(gamma | y, w)
eBayes_logit_objective= function(w, msfit, Z, priorvar.w, Vinv) {
ans= log(sum(exp(msfit$postProb)))
ans= ans - (0.5 / priorvar.w) * (matrix(w,nrow=1) %*% Vinv %*% matrix(w,ncol=1)) #subtract log-prior
return(ans)
}
# Objective function in the M-step under a logit link
# The objective function is f(w)= E[ log pi(gamma | w) | y, w=wcur ]
# - w: q-dimensional vector with hyper-parameter values
# - msfit: output of modelSelection when w=wcur
# - postprob: posterior inclusion probabilities pi(gamma_j = 1 | wcur), where wcur is the current hyper-parameter value
# - Z: p x q matrix containing the q meta-covariates for the p covariates. It should not include an intercept, as it's already automatically added
# - postprob, priorprob: ignored. Kept for compability with eBayes_em_logit_grad
# - priorvar.w, Vinv: prior on w is N(0, priorvar.w V), and Vinv= V^{-1}
# Output: f(w) at w
eBayes_em_logit_objective= function(w, msfit, Z, postprob, priorprob, priorvar.w, Vinv) {
if (missing(priorprob)) priorprob= 1 / (1 + exp(- Z %*% w))
ppmodel= postProb(msfit)
logprior= double(nrow(ppmodel))
for (i in 1:nrow(ppmodel)) {
gamma= as.numeric(strsplit(ppmodel$modelid[i], split=",")[[1]])
logprior[i]= sum(log(priorprob[gamma])) + sum(log((1-priorprob)[-gamma]))
}
ans= weighted.mean(logprior, w=ppmodel$pp) #log marginal likelihood
ans= ans - (0.5 / priorvar.w) * (matrix(w,nrow=1) %*% Vinv %*% matrix(w,ncol=1)) #subtract log-prior
return(ans)
}
# Gradient of the objective function in the M-step under a logit link
# The objective function is f(w)= E[ log pi(gamma | w) | y, w=wcur ]
# - w: q-dimensional vector with hyper-parameter values
# - postprob: posterior inclusion probabilities pi(gamma_j = 1 | wcur), where wcur is the current hyper-parameter value
# - Z: p x q matrix containing the q meta-covariates for the p covariates. It should not include an intercept, as it's already automatically added
# - priorprob: prior inclusion probabilities 1/(1+exp(Z w)). If not specified, they're computed from (Z,w)
# - msfit: ignored (kept for compability with eBayes_em_logit_objective)
# Output: gradient of f(w) at w
eBayes_em_logit_grad= function(w, postprob, Z, priorprob, msfit) {
if (missing(priorprob)) priorprob= 1 / (1 + exp(- Z %*% w))
grad= t(Z) %*% (postprob - priorprob)
return(grad)
}
# Same as eBayes_em_logit_grad, but returns the hessian of f(w)
eBayes_em_logit_hess= function(w, postprob, Z, priorprob, msfit) {
if (missing(priorprob)) priorprob= 1 / (1 + exp(- Z %*% w))
Zstd= sqrt(priorprob[,1]) * Z
hess= - t(Zstd) %*% Zstd
return(hess)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.