Nothing
#' Log-posterior for given spline parameters, big bin (and optional: small bin) frequencies, tabulated sample moments and roughness penalty parameter.
#' Compared to \link{degross_lpost}, no Fisher information matrix is computed and the gradient evaluation is optional, with a resulting computational gain.
#' @usage degross_lpostBasic(phi, tau, n.i, degross.data,
#' use.moments = rep(TRUE,4), freq.min = 20, diag.only=FALSE,
#' gradient=FALSE,
#' penalize = TRUE, aa = 2, bb = 1e-6, pen.order = 3)
#' @param phi Vector of K B-spline parameters \eqn{\phi} to specify the log-density.
#' @param tau Roughness penalty parameter.
#' @param n.i Small bin frequencies.
#' @param degross.data A \link{degrossData.object} created using the \link{degrossData} function.
#' @param use.moments Vector with 4 logicals indicating which tabulated sample moments to use as soft constraints. Defaults: \code{rep(TRUE,4)}.
#' @param freq.min Minimal big bin frequency required to use the corresponding observed moments as soft constraints. Default: \code{20}.
#' @param diag.only Logical indicating whether to ignore the off-diagonal elements of the variance-covariance matrix of the sample central moments. Default: FALSE.
#' @param gradient Logical indicating if the gradient (Score) of the \eqn{\log p(\phi|\tau,data)} should be computed (default: FALSE).
#' @param penalize Logical indicating whether a roughness penalty of order \code{pen.order} is required (with \eqn{tau \sim G(aa,bb)}). Default: \code{TRUE}.
#' @param aa Real giving the first parameter in the Gamma prior for \code{tau}. Default: \code{2}.
#' @param bb Real giving the second parameter in the Gamma prior for \code{tau}. Default: \code{1e-6}.
#' @param pen.order Integer giving the order of the roughness penalty. Default: \code{3}.
#'
#' @return A list containing :
#' \itemize{
#' \item{\code{lpost.ni} : \verb{ }}{value of the log-posterior based on the given small bin frequencies \code{n.i} and the tabulated sample moments.}
#' \item{\code{lpost.mj} : \verb{ }}{value of the log-posterior based on the big bin frequencies \code{degross.data$freq.j} and the tabulated sample moments.}
#' \item{\code{llik.ni} : \verb{ }}{multinomial log-likelihood based on the given small bin frequencies \code{n.i}.}
#' \item{\code{llik.mj} : \verb{ }}{multinomial log-likelihood based on the big bin frequencies \code{degross.data$freq.j} resulting from \code{n.i}.}
#' \item{\code{moments.penalty} : \verb{ }}{log of the joint (asymptotic) density for the observed sample moments.}
#' \item{\code{penalty} : \verb{ }}{\eqn{\log p(\phi|\tau) + \log p(\tau)}.}
#' \item{\code{M.j} : \verb{ }}{theoretical moments of the density (resulting from \eqn{\phi}) within a big bin.}
#' \item{\code{pi.i} : \verb{ }}{small bin probabilities.}
#' \item{\code{ui} : \verb{ }}{small bin midpoints.}
#' \item{\code{delta} : \verb{ }}{width of the small bins.}
#' \item{\code{gamma.j} : \verb{ }}{big bin probabilities.}
#' \item{\code{tau} : \verb{ }}{reminder of the value of the roughness penalty parameter \eqn{\tau}.}
#' \item{\code{phi} : \verb{ }}{reminder of the vector of spline parameters (defining the density).}
#' \item{\code{n.i} : \verb{ }}{reminder of the small bin frequencies given as input.}
#' \item{\code{freq.j} : \verb{ }}{reminder of the big bin frequencies in \code{degross.data$freq.j}.}
#' }
#'
#' @author Philippe Lambert \email{p.lambert@uliege.be}
#' @references
#' Lambert, P. (2021) Moment-based density and risk estimation from grouped summary statistics. arXiv:2107.03883.
#'
#' @seealso \code{\link{degross_lpost}}, \code{\link{degross}}, \code{\link{degross.object}}.
#'
#' @examples
#' sim = simDegrossData(n=3500, plotting=TRUE,choice=2) ## Generate grouped data
#' obj.data = degrossData(Big.bins=sim$Big.bins, freq.j=sim$freq.j, m.j=sim$m.j)
#' print(obj.data)
#' obj.fit = degross(obj.data) ## Estimate the underlying density
#' phi.hat = obj.fit$phi ; tau.hat = obj.fit$tau
#' ## Evaluate the log-posterior at convergence
#' res = degross_lpostBasic(phi=phi.hat, tau=tau.hat, degross.data=obj.data,
#' gradient=TRUE)
#' print(res)
#'
#' @export
degross_lpostBasic = function(phi,tau,n.i,degross.data,
use.moments=rep(TRUE,4), freq.min=20, diag.only=FALSE,
gradient=FALSE,
penalize=TRUE,aa=2,bb=1e-6,pen.order=3){
if (missing(n.i)) n.i = NULL
## Spline parameters
K = length(phi)
##
## Penalty matrix
Dd = diff(diag(K),diff=pen.order)
Pd = t(Dd) %*% Dd
##
## freq.min = 20 ## Minimal big bin frequency for using the observed moments as soft constraints
## Data
freq.j = degross.data$freq.j # Frequencies for big bins
m.tot = sum(freq.j)
J = length(freq.j) # Nbr of big bins
m.j = degross.data$m.j # Sample central moments
small.to.big = degross.data$small.to.big # Big bin to which each small bin belongs
ui = degross.data$ui ## Midpoints of the small bins
delta = diff(ui[1:2]) ## Width of a small bin
##
## Fitted mean for small & big bins + small bin probs
B.i = degross.data$B.i
eta.i = c(B.i %*% phi)
temp = exp(eta.i) ## Small bin means
pi.i = temp / sum(temp) ## Small bin probs
gamma.j = c(rowsum(pi.i,small.to.big)) # Big bin probs
## gamma.j = c(tapply(pi.i,small.to.big,sum)) # Big bin probs
##
## Theoretical moments of the fitted density within a big bin
M.j = matrix(nrow=J,ncol=8)
rownames(M.j) = paste("Bin",1:J,sep="")
colnames(M.j) = paste("mu",1:8,sep="")
M.j[,1] = rowsum(ui*pi.i,small.to.big) / gamma.j # Theoretical mean within big bin j
## M.j[,1] = tapply(ui*pi.i,small.to.big,sum) / gamma.j # Theoretical mean within big bin j
for (r in 2:8){
## kth theoretical central moment within each of the big bins
M.j[,r] = rowsum((ui-M.j[small.to.big,1])^r*pi.i,small.to.big) / gamma.j
## M.j[,r] = tapply((ui-M.j[small.to.big,1])^r*pi.i,small.to.big,sum) / gamma.j
}
## Gradient
if (gradient){
Bi.tilde = t(t(B.i) - c(apply(pi.i*B.i,2,sum)))
PiBi.tilde = pi.i*Bi.tilde
## Score.ni
if (!is.null(n.i)) Score.ni = c(t(B.i)%*%(n.i-m.tot*pi.i))
else Score.ni = NULL
## Score.mj
Score.mj = Score.lprior = rep(0,ncol(B.i))
for (j in 1:J){
idx = which(small.to.big == j)
Score.mj = Score.mj + freq.j[j]/gamma.j[j]*c(apply(PiBi.tilde[idx,],2,sum))
}
}
R = 4 ## Up to moments of order 4 of interest
dmujr.k = array(dim=c(J,R,K)) # \partial\mu_{rj} / \partial\theta_k
##
## Contribution due to sample central moments within each big bin
if (sum(use.moments) > 0){
invSigma.j = array(dim=c(J,4,4))
for (j in 1:J){
idx = which(small.to.big == j)
if (gradient){
## \partial\mu_{1j} / \partial\theta_k
dmujr.k[j,1,] = c(apply((ui[idx]-M.j[j,1])*pi.i[idx]*Bi.tilde[idx,],2,sum)/gamma.j[j])
## \partial\mu_{rj} / \partial\theta_k (r > 1)
for (r in 2:4){
dmujr.k[j,r,] = c(apply((ui[idx]-M.j[j,1])^r*pi.i[idx]*Bi.tilde[idx,],2,sum)) / gamma.j[j]
temp = sum((ui[idx]-M.j[j,1])^(r-1)*pi.i[idx])
dmujr.k[j,r,] = dmujr.k[j,r,] - r * temp * dmujr.k[j,1,] / gamma.j[j]
temp2 = c(apply(pi.i[idx]*Bi.tilde[idx,],2,sum)/gamma.j[j])
dmujr.k[j,r,] = dmujr.k[j,r,] - M.j[j,r] * temp2
}
}
##
## Sample moments of interest with non-NA values AND big bin sample size >= freq.min
idx2 = which((freq.j[j] >= freq.min) & (!is.na(m.j[j,])) & use.moments)
if (length(idx2) > 0){ # i.e. if at least one observed sample moment of interest
Mat = Sigma_fun(M.j[j,1:8]) # Theoretical covariance matrix for moments
if (diag.only) Mat = diag(diag(Mat)) # Just keep variances (as covariances rely on moments of very high orders !)
invSigma.j[j,,] = freq.j[j] * solve(Mat) # Theoretical version corrected for frequencies
if (gradient){
Mat = matrix(invSigma.j[j,idx2,idx2],nrow=length(idx2)) # Make sure that sub-matrix is a matrix...
temp2 = matrix(dmujr.k[j,idx2,],nrow=length(idx2))
temp = Mat %*% temp2
Score.lprior = Score.lprior + matrix(m.j[j,idx2]-M.j[j,idx2],nrow=1) %*% temp
}
}
}
}
## Contribution due to roughness penalty
if (gradient){
Score.lprior = c(Score.lprior)
if (penalize){
Score.lprior = Score.lprior - tau * c(Pd%*%phi)
}
##
if (!is.null(n.i)) Score.ni = Score.ni + Score.lprior
Score.mj = Score.mj + Score.lprior
}
## lpost
llik.ni = NULL
if (!is.null(n.i)) llik.ni = sum(n.i*log(pi.i)) ## log complete likelihood
llik.mj = sum(freq.j * log(gamma.j)) ## log observed likelihood
##
moments.penalty = 0
if (sum(use.moments) > 0){
for (j in 1:J){
## Sample moments of interest with non-NA values AND big bin sample size >= 20
idx2 = which((freq.j[j] >= freq.min) & (!is.na(m.j[j,])) & use.moments)
if (sum(idx2) > 0){
temp = matrix(invSigma.j[j,idx2,idx2],nrow=length(idx2))
eig.vals = svd(temp)$d ; eig.vals = eig.vals[eig.vals > 1e-4]
moments.penalty = moments.penalty + .5*sum(log(eig.vals))
moments.penalty = moments.penalty -.5 * sum(c(m.j[j,idx2]-M.j[j,idx2]) * c(temp %*% c(m.j[j,idx2]-M.j[j,idx2])))
}
}
}
## Roughness penalty
penalty = 0
if (penalize){
penalty = (aa+.5*nrow(Dd))*log(tau) -tau*(bb+.5*sum(phi*c(Pd%*%phi)))
}
##
lpost.ni = llik.ni + moments.penalty + penalty ## log complete posterior
lpost.mj = llik.mj + moments.penalty + penalty ## log observed posterior
##
## Output
res = list()
res$lpost.ni = lpost.ni
res$lpost.mj = lpost.mj
res$llik.ni = llik.ni
res$llik.mj = llik.mj
res$moments.penalty = moments.penalty
res$penalty = penalty
## res$Score = Score
if (gradient){
if (!is.null(n.i)) res$Score.ni = Score.ni
res$Score.mj = Score.mj
res$Score.lprior = Score.lprior
}
res$M.j = M.j
res$pi.i = pi.i
res$ui = ui ; res$delta = delta
res$gamma.j = gamma.j
res$tau = tau
res$phi = phi
res$n.i = n.i
res$freq.j = freq.j
##
return(res)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.