Nothing
################################################################################################################
## METHODS FOR OBJECTS OF CLASS skewtFit
################################################################################################################
setMethod("show", signature(object="skewtFit"), function(object) {
cat("skew-t mixture with ",object$G," components\n")
cat("Use coef() to obtain posterior means. clusterprobs() for cluster probabilities\n")
}
)
setMethod("coef", signature(object="skewtFit"), function(object, ...) {
mu <- lapply(object$mu,colMeans)
p <- length(mu[[1]])
diag <- 1:p; nondiag <- (p+1):ncol(object$Sigma[[1]])
Sigma <- lapply(object$Sigma, function(z) { vec2matrix(colMeans(z),diag=diag,nondiag=nondiag) })
alpha <- lapply(object$alpha,colMeans)
nu <- colMeans(object$nu)
probs <- colMeans(object$probs)
return(list(mu=mu,Sigma=Sigma,alpha=alpha,nu=nu,probs=probs))
}
)
#Compute cluster probabilities under skew-t mixture, averaging over posterior draws contained in x
# - fit: object of type skewtFit
# - x: data points at which to evaluate the probabilities
# - iter: iterations of posterior draws over which to average. Defaults to using 1,000 equally spaced iterations.
setMethod("clusterprobs", signature(fit='skewtFit'), function(fit, x, iter) {
if (missing(iter)) {
if (nrow(fit$probs)>1000) {
iter <- as.integer(seq(1,nrow(fit$probs),length=min(nrow(fit$probs),1000)))
} else {
iter <- 1:nrow(fit$probs)
}
}
if (!is.integer(iter)) stop("iter must contain integer iteration numbers")
if ((min(iter)<1) | max(iter)>nrow(fit$mu[[1]])) stop("Specified iter values are out of valid range")
if (missing(x)) stop("x values must be provided")
G <- fit$G; p <- ncol(fit$mu[[1]])
diag <- 1:p; nondiag <- (p+1):ncol(fit$Sigma[[1]])
S <- lapply(1:G, function(i) { ans <- apply(fit$Sigma[[i]][iter,], 1, vec2matrix, diag=diag, nondiag=nondiag); array(as.vector(ans),dim=c(p,p,ncol(ans))) } )
proboneiter <- function(i) {
dx <- sapply(1:G,function(g) dskewt(x,mu=fit$mu[[g]][iter[i],],Sigma=S[[g]][,,i],alpha=fit$alpha[[g]][iter[i],],nu=fit$nu[iter[i],g],param='eps',logscale=TRUE,ttype=fit$ttype))
dx <- t(t(dx)+log(fit$probs[i,]))
dx <- exp(dx - apply(dx,1,max))
dx/rowSums(dx)
}
ans <- lapply(1:length(iter),function(i) proboneiter(i))
ans <- Reduce('+',ans)/length(ans)
return(ans)
}
)
loglmean <- function(fit,x) {
#log-likelihood evaluated at posterior mean
parest <- coef(fit)
sum(dmixskewt(x,mu=parest$mu,Sigma=parest$Sigma,alpha=parest$alpha,nu=parest$nu,probs=parest$probs,param='eps',logscale=TRUE,ttype=fit$ttype))
}
logl <- function(fit,x) {
#log-likelihood at each MCMC iteration
if (class(fit) != 'skewtFit') stop("fit must be of class skewtFit")
G <- fit$G; p <- ncol(fit$mu[[1]])
diag <- 1:p; nondiag <- (p+1):ncol(fit$Sigma[[1]])
S <- lapply(1:G, function(i) { ans <- apply(fit$Sigma[[i]], 1, vec2matrix, diag=diag, nondiag=nondiag); array(as.vector(ans),dim=c(p,p,ncol(ans))) } )
lhoodoneiter <- function(i) {
dx <- sapply(1:G,function(g) dskewt(x,mu=fit$mu[[g]][i,],Sigma=S[[g]][,,i],alpha=fit$alpha[[g]][i,],nu=fit$nu[i,g],param='eps',logscale=TRUE,ttype=fit$ttype))
dx <- t(t(dx)+log(fit$probs[i,]))
sum(log(rowSums(exp(dx))))
}
ans <- sapply(1:nrow(fit$mu[[1]]),function(i) lhoodoneiter(i))
ans
}
vec2matrix <- function(vec,diag,nondiag) {
#Format input vector as symmetric matrix. Indices of diagonal elements are in diag, those on the upper triangle in nondiag
S <- diag(vec[diag],ncol=length(diag))
if (length(nondiag)>0) {
S[upper.tri(S)] <- vec[nondiag]
S <- S + t(S) - diag(diag(S))
}
return(S)
}
fixLabelSwitch <- function(fit,x,z,method='ECR') {
#Permute component labels to avoid label switching issues. Components relabelled to have increasing projection on first PC
# Input
# - fit: object of type skewtFit
# - x: observed data used to obtain fit
# - z: latent cluster allocations at each MCMC iteration
# - method: 'ECR' for Papastamoulis-Iliopoulos 2010 to make simulated z similar to pivot z (taken from last iteration); 'RW' for Rodriguez-Walker (2014) relabelling (loss function aimed at preserving cluster means), 'PC' for identifiability constraint based on projection of location parameters on first principal component
# Output: object of type skewtFit with relabelled components
if (class(fit) != 'skewtFit') stop("fit must be of class skewtFit")
if (method=='ECR') {
r <- ecr(z[nrow(z),],z=z,K=fit$G)[[1]]
} else if (method=='RW') {
r <- dataBased(x,K=fit$G,z)[[1]]
} else if (method=='PC') {
e <- eigen(cov(x))$vectors[,1,drop=FALSE]
proj <- do.call(cbind,lapply(fit$mu, "%*%", e))
r <- t(apply(proj,1,rank,ties.method='first'))
} else { stop("Invalid value for 'method' in fixLabelSwitch") }
fitnew <- fit
for (g in 1:fit$G) {
glabel <- apply(r==g, 1, function(z) match(TRUE,z)) #component in fit corresponding to g^th reordered component in fitnew
for (gg in 1:fit$G) {
sel <- glabel==gg
fitnew$mu[[g]][sel,] <- fit$mu[[gg]][sel,]
fitnew$Sigma[[g]][sel,] <- fit$Sigma[[gg]][sel,]
fitnew$alpha[[g]][sel,] <- fit$alpha[[gg]][sel,]
fitnew$nu[sel,g] <- fit$nu[sel,gg]
fitnew$probs[sel,g] <- fit$probs[sel,gg]
if (!is.null(dim(fit$cluster))) fitnew$cluster[fit$cluster==gg] <- g
}
}
return(fitnew)
}
################################################################################################################
## ROUTINES TO EVALUATE DENSITY, RANDOM NUMBER GENERATION
################################################################################################################
#Univariate skew T density
# When param=='eps': dt(x;mu,s/(1-alpha),nu) I(x>=mu) + dt(x;mu,s/(1+alpha),nu) I(x<mu)
# When param=='isf': dt(x;mu,s*alpha,nu) I(x>=mu) + dt(x;mu,s/alpha,nu) I(x<mu)
dskewtuniv <- function(x,mu,s,alpha,nu,param='eps',logscale=FALSE) {
if (param=='eps') {
ans <- dtp3(x,mu=mu,par1=s,par2=alpha,FUN=function(z,log) dt(z,df=nu,log=log),param="eps",log=logscale)
} else if (param=='isf') {
ans <- dtp3(x,mu=mu,par1=s,par2=1/alpha,FUN=function(z,log) dt(z,df=nu,log=log),param="isf",log=logscale)
} else stop("Invalid value was specified for 'param'")
return(ans)
}
check.skewtpars <- function(mu,Sigma,A,D,alpha,nu,param) {
#Check that input parameters for multivariate t are correct
p <- length(mu)
if (missing(A) | missing(D)) {
if (ncol(Sigma) != p) stop("Dimension of mu and Sigma doesn't match")
} else {
if ((ncol(A)!=p) | (ncol(D)!=p)) stop("Dimensions of mu and A,D don't match")
}
if (param=='eps') {
if (any(abs(alpha)>1)) stop("When param=='eps' alpha must have entries in [-1,1]")
} else if (param=='isf') {
if (any(alpha<=0)) stop("When param=='isf' alpha must be a vector with entries >0")
}
if (length(nu)>1) stop("nu must have length 1")
if (nu<=0) stop("nu must be >=0")
}
#Multivariate skew T density (p dimensions)
# Input
# - x: vector of length(p), alternatively matrix or data.frame with arbitrary number of rows and ncol(x)==p
# - mu: mean vector of length(p)
# - Sigma: p*p positive-definite matrix
# - A: optional parameter containing pre-computed A=t(eigen(Sigma)). For identifiability all elements in A[,1] must be positive, if any entry is negative then the corresponding row is changed sign
# - D: optional parameter containing pre-computed A=diag(eigen(Sigma)$values)
# - alpha: asymmetry parameters. Either a vector of length p or length 1, in the latter case all asymmetry parameters are assumed equal
# - nu: scalar indicating degrees of freedom
# - param: set to 'eps' for the epsilon-skew parametization, 'isf' for the inverse scale factor parameterization
# - logscale: set to TRUE to obtain log-pdf
# - ttype: set to 'independent' for independent skew-t, to 'dependent' for dependent skew-t
# Output: multivariate skew T pdf evaluated at x. It has length 1 if x was a vector, length equal to nrow(x) if x was a matrix or data.frame
dskewt <- function(x,mu,Sigma,A,D,alpha,nu,param='eps',logscale=FALSE,ttype='independent') {
p <- length(mu)
if (length(alpha) != p) { if (length(alpha)==1) alpha <- rep(alpha,p) else stop("length(alpha) must be either 1 or length(mu)") }
check.skewtpars(mu=mu,Sigma=Sigma,A=A,D=D,alpha=alpha,nu=nu,param=param)
#Eigendecomposition
if (missing(A) | missing(D)) {
e <- eigen(Sigma,symmetric=TRUE); A <- t(e$vectors); D <- diag(p); diag(D) <- e$values; A <- A * ifelse(sign(A[,1])== -1, -1, 1)
}
if (any(diag(D)<=0)) stop("Sigma is not positive definite")
if (is.vector(x)) {
x <- as.numeric(x)
if (length(x) != p) stop("Dimensions of x and mu don't match")
sqDinv <- diag(p); diag(sqDinv) <- 1/sqrt(diag(D))
z <- sqDinv %*% A %*% matrix(x - as.vector(as.numeric(mu)),ncol=1)
} else if (is.matrix(x) | is.data.frame(x)) {
if (ncol(x) != p) stop("Dimensions of x and mu don't match")
sqDinv <- diag(p); diag(sqDinv) <- 1/sqrt(diag(D))
z <- sqDinv %*% A %*% (t(as.matrix(x)) - as.vector(as.numeric(mu)))
} else stop("x must be a vector, matrix or data.frame")
if (ttype=='independent') {
ans <- -0.5*sum(log(diag(D)))
for (i in 1:p) { ans <- ans + dskewtuniv(z[i,],mu=0,s=1,alpha=alpha[i],nu=nu,param=param,logscale=TRUE) }
} else if (ttype=='dependent') {
ans <- -0.5*sum(log(diag(D))) + lgamma((p+nu)/2) - lgamma(nu/2) - 0.5*p*log(pi*nu)
xi <- (z>=0) / (1-alpha)^2 + (z<0) / (1+alpha)^2
ans <- ans - 0.5*(p+nu)*log(1 + rowSums(xi * z^2)/nu)
} else { stop("Invalid ttype") }
if (!logscale) ans <- exp(ans)
return(ans)
}
#Density for mixture of multivariate skew t's (arguments as in dskewt)
dmixskewt <- function(x,mu,Sigma,alpha,nu,probs,param='eps',logscale=FALSE,ttype='independent') {
K <- length(probs)
if (length(mu) != K) stop("mu must be a list with length = length(probs)")
if (length(Sigma) != K) stop("Sigma must be a list with length = length(probs)")
if (length(alpha) != K) stop("alpha must be a list with length = length(probs)")
if (is.vector(x)) x <- matrix(x,nrow=1)
logd <- matrix(NA,nrow=nrow(x),ncol=K)
for (i in 1:K) { logd[,i] <- dskewt(x,mu=mu[[i]],Sigma=Sigma[[i]],alpha=alpha[[i]],nu=nu[[i]],param=param,logscale=TRUE,ttype=ttype) + log(probs[i]) }
ct <- apply(logd,1,max)
ans <- exp(ct) * rowSums(exp(logd-ct))
if (logscale) ans <- log(ans)
return(ans)
}
#Draws from multivariate skew-t. Set ttype='independent' for independent skew-t, ttype='dependent for dependent skew-t
rskewt <- function(n,mu,Sigma,alpha,nu,param='eps',ttype='independent') {
p <- length(mu)
check.skewtpars(mu=mu,Sigma=Sigma,alpha=alpha,nu=nu,param=param)
if (length(alpha) != p) { if (length(alpha)==1) alpha <- rep(alpha,p) else stop("length(alpha) must be either 1 or length(mu)") }
#Eigendecomposition
e <- eigen(Sigma,symmetric=TRUE); A <- t(e$vectors); D <- diag(p); diag(D) <- e$values; A <- A * ifelse(sign(A[,1])== -1, -1, 1)
if (any(e$values<=0)) stop("Sigma is not positive definite")
#Draw from underlying uncorrelated variables
ans <- matrix(NA,nrow=n,ncol=p)
if (ttype=='independent') {
if (param=='eps') {
for (i in 1:p) ans[,i] <- rtp3(n,mu=0,par1=1,par2=alpha[i],FUN=function(n) rt(n=n,df=nu), param="eps")
} else if (param=='isf') {
for (i in 1:p) ans[,i] <- rtp3(n,mu=0,par1=1,par2=1/alpha[i],FUN=function(n) rt(n=n,df=nu), param="isf")
} else stop("Invalid value was specified for 'param'")
} else if (ttype=='dependent') {
if (param=='eps') {
omega <- 1/rgamma(n,nu/2,nu/2)
for (i in 1:p) ans[,i] <- sqrt(omega) * rtp3(n,mu=0,par1=1,par2=alpha[i],FUN=function(n) rnorm(n=n), param="eps")
} else if (param=='isf') {
for (i in 1:p) ans[,i] <- sqrt(omega) * rtp3(n,mu=0,par1=1,par2=1/alpha[i],FUN=function(n) rnorm(n=n), param="isf")
} else stop("Invalid value was specified for 'param'")
} else {
stop("Invalid ttype")
}
#Perform linear transform
ans <- t(t(A) %*% (sqrt(e$values) * t(ans)) + mu)
return(ans)
}
#Multivariate draws from mixture of skew-t's
rmixskewt <- function(n,mu,Sigma,alpha,nu,probs,param='eps',ttype='independent') {
#Checks
K <- length(probs)
if (!is.list(mu)) { mu <- lapply(1:K,function(z) mu); warning("Formatted mu as list") } else { if (length(mu) != K) stop("mu has the wrong length") }
if (!is.list(Sigma)) { Sigma <- lapply(1:K,function(z) Sigma); warning("Formatted Sigma as list") } else { if (length(Sigma) != K) stop("Sigma has the wrong length") }
if (!is.list(alpha)) { alpha <- lapply(1:K,function(z) alpha); warning("Formatted alpha as list") } else { if (length(alpha) != K) stop("alpha has the wrong length") }
p <- length(mu[[1]])
for (i in 1:K) {
check.skewtpars(mu=mu[[i]],Sigma=Sigma[[i]],alpha=alpha[[i]],nu=nu[[i]],param=param)
if (length(alpha[[i]]) != p) { if (length(alpha[[i]])==1) alpha[[i]] <- rep(alpha[[i]],p) else stop("length(alpha[[i]]) must be either 1 or length(mu[[i]])") }
}
#Obtain draws
ngroup <- as.vector(rmultinom(1, size=n, prob=probs))
groupidx <- c(0,cumsum(ngroup))
x <- matrix(NA,nrow=n,ncol=p)
for (i in 1:K) { x[(groupidx[i]+1):groupidx[i+1],] <- rskewt(ngroup[i],mu=mu[[i]],Sigma=Sigma[[i]],alpha=alpha[[i]],nu=nu[[i]],param=param,ttype=ttype) }
idx <- sample(1:nrow(x), size=nrow(x), replace=FALSE)
x <- x[idx,]
cluster <- rep(1:K,ngroup)[idx]
return(list(x=x,cluster=cluster))
}
#Two-dimensional ellipse for skew-t distribution. level is the confidence level for the main axes along which asymmetry is defined
tpellipse <- function(mu=c(0,0),Sigma=diag(2),alpha=c(0,0),nu=Inf,level=0.95) {
if (length(alpha) != 2) stop("tpellipse only available for two-dimensional data")
p <- length(mu)
if (length(alpha) != p) { if (length(alpha)==1) alpha <- rep(alpha,p) else stop("length(alpha) must be either 1 or length(mu)") }
#Eigendecomposition
e <- eigen(Sigma,symmetric=TRUE); A <- t(e$vectors); sqD <- diag(sqrt(e$values),p); A <- A * ifelse(sign(A[,1])== -1, -1, 1)
if (any(e$values<=0)) stop("Sigma is not positive definite")
#
e1 <- ellipse(diag(c(1/(1+alpha[1])^2,1/(1+alpha[2])^2)),npoints=1000,level=level)
e1 <- e1[e1[,1]>0 & e1[,2]>0,]
e1 <- e1[order(e1[,1]),]
#
e2 <- ellipse(diag(c(1/(1+alpha[1])^2,1/(1-alpha[2])^2)),npoints=1000,level=level)
e2 <- e2[e2[,1]>0 & e2[,2]<0,]
e2 <- e2[order(e2[,1],decreasing=TRUE),]
#
e3 <- ellipse(diag(c(1/(1-alpha[1])^2,1/(1-alpha[2])^2)),npoints=1000,level=level)
e3 <- e3[e3[,1]<0 & e3[,2]<0,]
e3 <- e3[order(e3[,1],decreasing=TRUE),]
#
e4 <- ellipse(diag(c(1/(1-alpha[1])^2,1/(1+alpha[2])^2)),npoints=1000,level=level)
e4 <- e4[e4[,1]<0 & e4[,2]>0,]
e4 <- e4[order(e4[,1]),]
#
e <- rbind(e1,e2,e3,e4,e1[1,]) * qt((1-level)/2,df=nu) / qnorm((1-level)/2)
ans <- t(t(A) %*% (sqD %*% t(e)) + mu)
return(ans)
}
################################################################################################################
## ROUTINES TO EVALUATE PRIOR DISTRIBUTIONS
################################################################################################################
#Discretized Juarez-Steel gamma-gamma prior on degrees of freedom truncated to [1,numax]
priornuJS <- function(nu,k=2.78,numax=30) {
if (any(nu>numax)) stop("All elements in nu must be <=numax")
if (any((nu %% 1) != 0)) stop("nu cannot have non-integer values")
if (any(nu<=0)) stop("All elements in nu must be strictly positive")
ans <- k*(1:numax)/((1:numax)+k)^3
ans <- ans/sum(ans)
return(ans[as.integer(nu)])
}
#Villa-Walker objective prior on degrees of freedom truncated to [1,numax]
priornuVW <- function(nu,numax=30) {
if (any(nu>numax)) stop("All elements in nu must be <=numax")
if (any((nu %% 1) != 0)) stop("nu cannot have non-integer values")
if (any(nu<=0)) stop("All elements in nu must be strictly positive")
f1 <- function(x,nufix) { log(1+x^2/nufix) * dt(x,df=nufix) }
f2 <- function(x,nufix) { log(1+x^2/(nufix+1)) * dt(x,df=nufix) }
#f3 <- function(x,nufix) { log(1+x^2/(nufix-1)) * dt(x,df=nufix) }
#f4 <- function(x,nufix) { log(1+x^2/nufix) * dnorm(x) }
nuseq <- 1:numax
ans <- 0.5*(log(nuseq+1)-log(nuseq)) + lbeta(0.5,0.5*(nuseq+1)) - lbeta(0.5,0.5*nuseq)
for (i in 1:numax) {
int1 <- integrate(f1,-Inf,Inf,nufix=i)$value
int2 <- integrate(f2,-Inf,Inf,nufix=i)$value
ans[i] <- ans[i] - 0.5*(i+1)*int1 + 0.5*(i+2)*int2
}
#int1 <- integrate(f1,-Inf,Inf,nufix=numax-1)$value
#int2 <- integrate(f3,-Inf,Inf,nufix=numax-1)$value
#ans[numax-1] <- ans[numax-1] - 0.5*numax*int1 + 0.5*(numax-1)*int2
#int2 <- integrate(f4,-Inf,Inf,nufix=numax-1)$value
#ans[numax] <- ans[numax] - 0.5 + 0.5*(numax-1)*int2
ans <- exp(ans-max(ans))
ans <- ans/sum(ans)
return(ans[as.integer(nu)])
}
################################################################################################################
## AUXILIARY FUNCTIONS
################################################################################################################
rdirichlet <- function (n, alpha) {
#Obtain n draws from Dirichlet(alpha)
l <- length(alpha)
x <- matrix(rgamma(l * n, alpha), ncol = l, byrow = TRUE)
sm <- x %*% rep(1, l)
x/as.vector(sm)
}
diwishfast <- function (eigenvecW, eigenvalW, v, S, logscale=TRUE) {
#Density of inverse Wishart ignoring constant terms depending on degrees of freedom (v), i.e. gives same output proportional to dinvwishart (below)
k <- nrow(S)
logdetS <- as.numeric(determinant(S,logarithm=TRUE)$modulus)
logdetW <- sum(log(eigenvalW))
diagWinv <- diag(length(eigenvalW)); diag(diagWinv) <- 1/eigenvalW
hold <- S %*% t(eigenvecW) %*% diagWinv %*% eigenvecW #S %*% solve(W)
ans <- 0.5*v*logdetS - 0.5*(v+k+1)*logdetW - 0.5*sum(diag(hold))
if (!logscale) ans <- exp(ans)
return(ans)
}
dinvwishart <- function (Sigma, nu, S, log = FALSE) {
k <- nrow(Sigma)
gamsum <- 0
for (i in 1:k) gamsum <- gamsum + lgamma((nu + 1 - i)/2)
dens <- -(nu * k/2) * log(2) - ((k * (k - 1))/4) * log(pi) - gamsum + (nu/2) * log(det(S)) - ((nu + k + 1)/2) * log(det(Sigma)) - 0.5 * sum(diag(S %*% solve(Sigma)))
if (log == FALSE) dens <- exp(dens)
return(dens)
}
rinvwishart <- function(nu, S, Sinv) {
if (missing(Sinv)) Sinv <- solve(S)
solve(rWishart(1, df=nu, Sigma=Sinv)[,,1])
}
################################################################################################################
## GIBBS SAMPLING
################################################################################################################
#Create list containing prior parameters for mixture of multivariate skew-t's. The following prior functional form is used:
# (mu,Sigma) ~ N(mu; m,g*Sigma) * IWishart(Sigma; Q, q)
# 0.5*(alpha+1) ~ Beta(a,b)
# nu is discrete with P(nu=j)=nuprobs[j] (or proportional to nuprobs[j])
# probs ~ Symmetric Dirichlet(r), where r is a scalar
# Input
# - p: dimension of the observed data
# - G: number of components
# - m, g: parameters for prior on mu given Sigma
# - Q, q: parameters for prior on Sigma
# - a, b: parameter for prior on asymetry parameter
# - r: parameter for symmetric Dirichlet prior on mixing probabilities
# - nuprobs: vector where P(nu=j)=nuprobs[j]. Names can be provided, else it is assumed that support of nu ranges from 1 to length(nuprobs)
skewtprior <- function(p,G,m=rep(0,p),g=1,Q=diag(p),q=p+1,a=2,b=2,r=1/G,nuprobs=priornuJS(1:30,k=2.78,numax=30)) {
if (length(m) != p) stop("m has the wrong length")
if (length(g)>1) stop("g must have length 1")
if (!is.matrix(Q)) stop("Q must be a matrix")
if ((nrow(Q) != ncol(Q)) | (nrow(Q) != p)) stop("Q must be a square matrix with p rows")
#if (any(Q[upper.tri(Q)] != Q[lower.tri(Q)])) stop("Q must be symmetric")
if (any(eigen(Q,symmetric=TRUE)$values <= 0)) stop("Q is not positive-definite!")
if (q < p) stop("q must be >= p")
if (length(r)>1) stop("r must have length 1")
ans <- vector("list",5)
names(ans) <- c('mu','Sigma','alpha','nu','probs')
ans[['mu']] <- list(m=m,g=g)
ans[['Sigma']] <- list(Q=Q,q=q)
ans[['alpha']] <- list(a=a,b=b)
ans[['nu']] <- nuprobs; if (is.null(names(nuprobs))) names(ans[['nu']]) <- 1:length(nuprobs)
ans[['probs']] <- c(r=r)
return(ans)
}
#MH-within-Gibbs sampling for mixture of multivariate skew t's
# Input
# - x: observed data (observations in rows, variables in columns)
# - G: number of components
# - clusini: either vector with initial cluster allocation, 'kmedians' for K-medians (as in cclust from package flexclust) 'kmeans' for K-means, 'em' for EM algorithm based on mixture of normals (as in Mclust package, initialized by hierarchical clustering so it can be slow)
# - priorParam: list with named elements 'mu', 'Sigma', 'alpha', 'nu', 'probs' containing prior parameters. See help(skewtprior).
# - niter: number of Gibbs iterations
# - ttype: set to 'independent' for independent skew-t, to 'dependent' for dependent skew-t
# - returnCluster: if set to TRUE the allocated cluster at each MCMC iteration is returned. This can be memory-consuming if nrow(x) is large.
# - relabel: 'none' to do no relabelling. 'ECR' for Papastamoulis-Iliopoulos 2010 to make simulated z similar to pivot z (taken from last iteration); 'RW' for Rodriguez-Walker (2014) relabelling (loss function aimed at preserving cluster means), 'PC' for identifiability constraint based on projection of location parameters on first principal component
# - verbose: set to TRUE to output iteration progress
# Output:
# - mu: list of length G, where mu[[i]] is a matrix with posterior draws (niter-burnin rows)
# - Sigma: list of length G, where Sigma[[i]] is a matrix with posterior draws (niter-burnin rows). Each row contains diag(S),S[upper.tri(S)]
# - alpha: list of length G, where alpha[[i]] is a matrix with posterior draws (niter-burnin rows)
# - nu: matrix with niter-burnin rows and G columns with posterior draws for the degrees of freedom
# - probs: matrix with niter-burnin rows and G columns with posterior draws for the mixing probabilities
# - probcluster: matrix with nrow(x) rows and G columns with posterior probabilities that each observation belongs to each cluster (Rao-Blackwellized)
# - cluster: if returnCluster==TRUE, a matrix with niter-burnin rows and nrow(x) columns with latent cluster allocations at each MCMC iteration. If returnCluster==FALSE, NA is returned.
mixskewtGibbs <- function(x, G, clusini='kmedians', priorParam=skewtprior(ncol(x),G), niter, burnin=round(niter/10), ttype='independent', returnCluster=FALSE, relabel='ECR', verbose=TRUE) {
#require(mvtnorm)
if (!(ttype %in% c('independent','dependent'))) stop("ttype must be equal to 'independent' or 'dependent'")
p <- ncol(x); n <- nrow(x)
m <- priorParam[['mu']]$m; gprior <- priorParam[['mu']]$g
Q <- priorParam[['Sigma']]$Q; q <- priorParam[['Sigma']]$q
a <- priorParam[['alpha']]$a; b <- priorParam[['alpha']]$b
nuprobs <- priorParam[['nu']]
if (is.null(names(nuprobs))) { nuvals <- 1:length(nuprobs) } else { nuvals <- as.integer(names(nuprobs)) }
r <- priorParam[['probs']]['r']
## Initial cluster allocations ##
if (G>1) {
if (is.character(clusini)) {
if (clusini=='kmedians') {
z <- predict(cclust(x, k=G, dist='manhattan', method='kmeans'))
} else if (clusini=='kmeans') {
z <- kmeans(x, centers=G)$cluster
} else if (clusini=='em') {
z <- Mclust(x, G=G, modelNames='VVV')$classification
} else stop("Invalid value for 'clusini'")
} else {
if (length(clusini) != nrow(x)) stop("length(clusini) must be equal to nrow(x)")
z <- as.integer(factor(clusini))
}
} else {
z <- rep(1,nrow(x))
}
zcount <- rep(0,G); names(zcount) <- (1:G)
tab <- table(z); zcount[names(tab)] <- tab
## Initialize parameters ##
probscur <- as.vector(rdirichlet(1,r+zcount))
txstd <- matrix(NA,nrow=ncol(x),ncol=nrow(x))
tx <- t(x); xstd <- t(txstd)
xi <- matrix(1,nrow=nrow(x),ncol=ncol(x))
mucur <- alphacur <- lapply(1:G, function(i) double(p))
Scur <- Acur <- Dcur <- lapply(1:G, function(i) matrix(NA,nrow=p,ncol=p)); nucur <- rep(30,G)
if (ttype=='independent') wcur <- matrix(1,nrow=n,ncol=p) else wcur <- matrix(1,nrow=n,ncol=1)
for (i in 1:G) {
sel <- which(z==i)
#Initialize alpha
alphacur[[i]] <- rep(0,p)
#Initialize mu, Sigma
if (length(sel)>0) {
mucur[[i]] <- apply(x[sel,],2,median); Scur[[i]] <- ((tx[,sel] - mucur[[i]]) %*% t(tx[,sel] - mucur[[i]]))/length(sel)
#mucur[[i]] <- colMeans(x[sel,]); Scur[[i]] <- var(x[sel,]) * (nucur[[i]]-2) / nucur[[i]]
} else {
mucur[[i]] <- m; Scur[[i]] <- Q / (q+p+1)
}
#Initialize w
e <- eigen(Scur[[i]],symmetric=TRUE); Acur[[i]] <- t(e$vectors); Dcur[[i]] <- diag(e$values,ncol=p); Acur[[i]] <- Acur[[i]] * ifelse(sign(Acur[[i]][,1])== -1, -1, 1)
if (length(sel)>0) {
sqDcurinv <- diag(1/sqrt(diag(Dcur[[i]])),ncol=p)
txstd[,sel] <- sqDcurinv %*% Acur[[i]] %*% (tx[,sel] - mucur[[i]])
#txstd[,sel] <- sqrt(Dcur[[i]]) %*% Acur[[i]] %*% (tx[,sel] - mucur[[i]])
xstd[sel,] <- t(txstd[,sel])
xi[sel,] <- t((txstd[,sel]>=0) / (1-alphacur[[i]])^2 + (txstd[,sel]<0) / (1+alphacur[[i]])^2)
#wcur[sel,] <- rwSkewtGibbs(xstd=xstd[sel,],xi=xi[sel,],nu=nucur[[i]],ttype=ttype)
}
}
##Gibbs sampling
mu <- lapply(1:G,function(i) matrix(NA,nrow=niter-burnin,ncol=p))
Sigma <- lapply(1:G,function(i) matrix(NA,nrow=niter-burnin,ncol=p*(p+1)/2))
alpha <- lapply(1:G,function(i) matrix(NA,nrow=niter-burnin,ncol=p))
nu <- probs <- matrix(NA,nrow=niter-burnin,ncol=G)
cluster <- matrix(NA,nrow=niter-burnin,ncol=nrow(x)); colnames(cluster) <- paste('indiv',1:n,sep='')
if (verbose) { niter10 <- round(niter/10); cat("Running MCMC") }
for (l in 1:niter) {
#Sample latent cluster indicators z
dx <- sapply(1:G,function(g) dskewt(x,mu=mucur[[g]],A=Acur[[g]],D=Dcur[[g]],alpha=alphacur[[g]],nu=nucur[[g]],param='eps',logscale=TRUE,ttype=ttype))
dx <- t(t(dx)+log(probscur))
dx <- exp(dx - apply(dx,1,max))
dx <- dx/rowSums(dx)
z <- apply(dx,1,function(pp) match(TRUE,runif(1)<cumsum(pp)))
tab <- table(z); zcount[1:G] <- 0; zcount[names(tab)] <- tab
#Sample component weights
probscur <- as.vector(rdirichlet(1,alpha=r+zcount))
for (g in 1:G) {
sel <- which(z==g)
if (length(sel)>0) {
sqDcurinv <- diag(1/sqrt(diag(Dcur[[g]])),ncol=p)
#Update precomputed stuff
txstd[,sel] <- sqDcurinv %*% Acur[[g]] %*% (tx[,sel] - mucur[[g]])
#txstd[,sel] <- sqrt(Dcur[[g]]) %*% Acur[[g]] %*% (tx[,sel] - mucur[[g]])
xstd[sel,] <- t(txstd[,sel])
xi[sel,] <- t((txstd[,sel]>=0) / (1-alphacur[[g]])^2 + (txstd[,sel]<0) / (1+alphacur[[g]])^2)
#Sample w
wcur[sel,] <- rwSkewtGibbs(xstd=xstd[sel,,drop=FALSE],xi=xi[sel,,drop=FALSE],nu=nucur[[g]],ttype=ttype)
#Sample mu, Sigma
munew <- rmuSkewtGibbs(tx[,sel,drop=FALSE],mucur=mucur[[g]],Sigma=Scur[[g]],A=Acur[[g]],D=Dcur[[g]],w=wcur[sel,,drop=FALSE],alpha=alphacur[[g]],xi=xi[sel,,drop=FALSE],nu=nucur[[g]],ttype=ttype,m=m,g=gprior)
if (munew$accept) {
mucur[[g]] <- munew$mu
txstd[,sel] <- sqDcurinv %*% Acur[[g]] %*% (tx[,sel] - mucur[[g]])
xi[sel,] <- t((txstd[,sel]>=0) / (1-alphacur[[g]])^2 + (txstd[,sel]<0) / (1+alphacur[[g]])^2)
}
newS <- rSigmaSkewtGibbs(tx[,sel,drop=FALSE],mu=mucur[[g]],Sigmacur=Scur[[g]],Acur=Acur[[g]],Dcur=Dcur[[g]],w=wcur[sel,,drop=FALSE],alpha=alphacur[[g]],nu=nucur[[g]],xi=xi[sel,,drop=FALSE],ttype=ttype,Q=Q,q=q)
if (newS$accept) {
Scur[[g]] <- newS$Sigmanew; Acur[[g]] <- newS$Anew; Dcur[[g]] <- newS$Dnew
txstd[,sel] <- sqDcurinv %*% Acur[[g]] %*% (tx[,sel] - mucur[[g]])
#xi[sel,] <- t((txstd[,sel]>=0) / (1-alphacur[[g]])^2 + (txstd[,sel]<0) / (1+alphacur[[g]])^2) #xi not needed for alpha or nu
}
if (munew$accept | newS$accept) xstd[sel,] <- t(txstd[,sel,drop=FALSE])
#Sample alpha
alphanew <- ralphaSkewtGibbs(alphacur=alphacur[[g]],xstd=xstd[sel,,drop=FALSE],w=wcur[sel,,drop=FALSE],a=a,b=b,ttype=ttype)
alphacur[[g]] <- alphanew$alphanew
#xi[sel,] <- t((txstd[,sel]>=0) / (1-alphacur[[g]])^2 + (txstd[,sel]<0) / (1+alphacur[[g]])^2) #xi not needed for nu
#Sample nu
nucur[[g]] <- rnuSkewtGibbs(x=x[sel,,drop=FALSE],mu=mucur[[g]],A=Acur[[g]],D=Dcur[[g]],alpha=alphacur[[g]],nuprobs=nuprobs,ttype=ttype)
} else {
#If no observations in cluster, sample from the prior
Scur[[g]] <- rinvwishart(q,Q)
e <- eigen(Scur[[g]],symmetric=TRUE); Acur[[g]] <- t(e$vectors); Dcur[[g]] <- diag(p); diag(Dcur[[g]]) <- e$values; Acur[[g]] <- Acur[[g]] * ifelse(sign(Acur[[g]][,1])== -1, -1, 1)
mucur[[g]] <- as.vector(rmvnorm(1,m,gprior * Scur[[g]]))
alphacur[[g]] <- 1 - 2*rbeta(p,a,b)
nucur[[g]] <- as.numeric(names(nuprobs)[rmultinom(n=1,size=1,prob=nuprobs)])
}
}
#Save output
if (l>burnin) {
idx <- l-burnin
for (g in 1:G) {
mu[[g]][idx,] <- mucur[[g]]
if (p>1) {
Sigma[[g]][idx,] <- c(diag(Scur[[g]]),Scur[[g]][upper.tri(Scur[[g]])])
} else {
Sigma[[g]][idx,] <- Scur[[g]]
}
alpha[[g]][idx,] <- alphacur[[g]]
}
nu[idx,] <- nucur
probs[idx,] <- probscur
cluster[idx,] <- z
}
if (verbose & ((l %% niter10)==0)) cat('.')
}
names(mu) <- names(Sigma) <- names(alpha) <- colnames(nu) <- colnames(probs) <- paste('cluster',1:ncol(nu),sep='')
ans <- list(mu=mu,Sigma=Sigma,alpha=alpha,nu=nu,probs=probs,probcluster=NA,cluster=NA,G=G,ttype=ttype)
if (returnCluster) ans$cluster <- cluster
fit <- new("skewtFit",ans)
#Fix potential label switching issues
if ((G>1) & (relabel!='none')) {
if (verbose) { cat('\n'); cat('Post-processing label-switching...') }
fit <- fixLabelSwitch(fit,x=x,z=cluster,method=relabel)
}
#Compute cluster probabilities
fit$probcluster <- clusterprobs(fit, x=x)
rownames(fit$probcluster) <- paste('indiv',1:n,sep='')
colnames(fit$probcluster) <- paste('cluster',1:ncol(nu),sep='')
if (verbose) cat('\n')
return(fit)
}
rmuSkewtGibbs <- function(tx,mucur,Sigma,A,D,w,alpha,xi,nu,ttype,m,g) {
#Metropolis-Hastings sampler from the conditional posterior of the skew-t location parameter mu given all other parameters
#Input
# - tx: t(x), where x are original observations
# - mucur: current value of mu
# - Sigma: scale matrix
# - A: pre-computed t(eigen(Sigma)$vectors). For identifiability all elements in A[,1] must be positive, if any entry is negative then the corresponding row is changed sign
# - D: pre-computed diag(eigen(Sigma)$values)
# - w: skew-t latent scale parameter (it can either be a vector for dependent t or a matrix for dependent t)
# - alpha: vector of length ncol(x) with asymmetry parameters
# - xi: (x>=0)/(1-alpha)^2 + (x<0)/(1+alpha)^2
# - nu: degrees of freedom
# - ttype: independent or dependent skew-t
# - m,g: prior on mu is N(m,g*Sigma)
#Output: list with 2 components
# - munew: sampled value of mu (it may be equal to mucur)
# - accept: indicates if munew==mucur
#
#Pre-compute useful quantities
sqDinv <- diag(nrow(D)); diag(sqDinv) <- 1/sqrt(diag(D))
sqDA <- sqDinv %*% A
Sigmainv <- solve(Sigma)
tphi <- t(xi/w)
xiwinv <- diag(nrow(tphi)); diag(xiwinv) <- rowSums(tphi)
#Parameters of proposal
Vinv <- Sigmainv/g + t(sqDA) %*% xiwinv %*% sqDA
term1 <- t(sqDA) %*% (tphi * (sqDA %*% tx)) %*% matrix(1,nrow=ncol(tx))
term2 <- g * Sigma %*% m
V <- solve(Vinv)
muprop <- V %*% (term1 + term2)
munew <- as.vector(rmvnorm(1,muprop,sigma=V))
#Parameters of reverse proposal
txstdnew <- sqDA %*% (tx - munew)
xinew <- t((txstdnew>=0) / (1-alpha)^2 + (txstdnew<0) / (1+alpha)^2)
tphinew <- t(xinew/w)
xiwinvnew <- diag(nrow(tphinew)); diag(xiwinvnew) <- rowSums(tphinew)
Vinvnew <- Sigmainv/g + t(sqDA) %*% xiwinvnew %*% sqDA
term1 <- t(sqDA) %*% (tphinew * (sqDA %*% tx)) %*% matrix(1,nrow=ncol(tx))
Vnew <- solve(Vinvnew)
mupropnew <- Vnew %*% (term1 + term2)
#Acceptance probability (likelihood marginalized wrt w)
u <- dmvnorm(mucur,mupropnew,solve(Vinvnew),log=TRUE) - dmvnorm(munew,muprop,V,log=TRUE)
#num <- sum(dskewt(t(tx),mu=munew,Sigma=Sigma,A=A,D=D,alpha=alpha,nu=nu,param='eps',logscale=TRUE,ttype=ttype)) + dmvnorm(munew,m,g*Sigma,log=TRUE)
#den <- sum(dskewt(t(tx),mu=mucur,Sigma=Sigma,A=A,D=D,alpha=alpha,nu=nu,param='eps',logscale=TRUE,ttype=ttype)) + dmvnorm(mucur,m,g*Sigma,log=TRUE)
#Acceptance probability (conditional on w)
num <- sqrt(tphinew) * (sqDA %*% (tx-munew))
num <- -0.5 * sum(num^2) - 0.5 * (t(munew-m) %*% Sigmainv %*% (munew-m)) / g
den <- sqrt(tphi) * (sqDA %*% (tx-mucur))
den <- -0.5 * sum(den^2) - 0.5 * (t(mucur-m) %*% Sigmainv %*% (mucur-m)) / g
u <- exp(u + num - den)
if (runif(1)<u) { accept <- TRUE } else { accept <- FALSE; munew <- mucur }
return(list(munew=munew,accept=accept))
}
rSigmaSkewtGibbsMarg <- function(tx,mu,Sigmacur,Acur,Dcur,alpha,nu,xi,ttype,Q=Q,q=q) {
#MH draws from posterior of Sigma= t(A) %*% D %*% A given all other parameters, but marginalized with respect to w
# Input
# - mu: mean vector
# - Sigmacur: current value of Sigma
# - Acur, Dcur: current value of A,D, i.e. Sigmacur= t(Acur) %*% Dcur %*% Acur
# - alpha: current value of asymmetry parameters
# - xi: (x>=0)/(1-alpha)^2 + (x<0)/(1+alpha)^2
# - ttype: independent or dependent skew-t
# - Q, q: prior on Sigma is InvWishart(Q,q), where q are the degrees of freedom
# Output: list with 2 components
# - Sigmanew: sampled value of Sigma (it may be equal to Sigmacur)
# - Anew: sampled value of A (it may be equal to Acur)
# - Dnew: sampled value of D (it may be equal to Dcur)
# - accept: indicates if Sigmanew==Sigmacur
#
n <- ncol(tx); p <- nrow(tx)
txmu <- tx-mu
#Proposal parameters
if (n>1) {
e <- eigen(cov(t(tx)))
Aprop <- t(e$vectors); Aprop <- Aprop * ifelse(sign(Aprop[, 1]) == -1, -1, 1)
txortho <- Aprop %*% txmu
xiprop <- t((txortho>=0) / (1-alpha)^2 + (txortho<0) / (1+alpha)^2)
xstd <- t(txortho) * sqrt(xiprop)
Dest <- diag(colSums(xstd^2),ncol=p)
Sprop <- Q + matrix(mu,ncol=1) %*% matrix(mu,nrow=1) + t(Aprop) %*% Dest %*% Aprop
} else {
Sprop <- Q + matrix(mu,ncol=1) %*% matrix(mu,nrow=1) + tx %*% t(tx)
}
#Propose new value
Sigmanew <- rinvwishart(n+q+p, Sprop)
e <- eigen(Sigmanew,symmetric=TRUE)
Anew <- t(e$vectors); Dnew <- diag(e$values,ncol=p); Dnewinv <- diag(1/e$values,ncol=p); Anew <- Anew * ifelse(sign(Anew[,1])== -1, -1, 1)
#Ratio of proposal densities
logpropcur <- diwishfast(Acur,diag(Dcur),n+q+p,Sprop,logscale=TRUE)
logpropnew <- diwishfast(Anew,diag(Dnew),n+q+p,Sprop,logscale=TRUE)
#Ratio of log-posterior densities
loglnew <- sum(dskewt(t(tx),mu=mu,A=Anew,D=Dnew,alpha=alpha,nu=nu,logscale=TRUE)) #unconditional on w
loglcur <- sum(dskewt(t(tx),mu=mu,A=Acur,D=Dcur,alpha=alpha,nu=nu,logscale=TRUE)) #unconditional on w
Dcurinv <- diag(1/diag(Dcur),ncol=p)
smunew <- matrix(mu,nrow=1) %*% t(Anew) %*% Dnewinv %*% Anew %*% matrix(mu,ncol=1)
smucur <- matrix(mu,nrow=1) %*% t(Acur) %*% Dcurinv %*% Acur %*% matrix(mu,ncol=1)
logpnew <- - 0.5*smunew + diwishfast(Anew,diag(Dnew),v=q+p,S=Q,logscale=TRUE)
logpcur <- - 0.5*smucur + diwishfast(Acur,diag(Dcur),v=q+p,S=Q,logscale=TRUE)
logposnew <- loglnew+logpnew
logposcur <- loglcur+logpcur
#Accept/reject move
u <- logposnew - logposcur + logpropcur - logpropnew
if (runif(1)<exp(u)) {
ans <- list(Sigmanew=Sigmanew,Anew=Anew,Dnew=Dnew,logposdif=logposnew-logposcur,logpropdif=logpropnew-logpropcur,accept=TRUE)
} else {
ans <- list(Sigmanew=Sigmacur,Anew=Acur,Dnew=Dcur,logposdif=logposnew-logposcur,logpropdif=logpropnew-logpropcur,accept=FALSE)
}
return(ans)
}
rSigmaSkewtGibbs <- function(tx,mu,Sigmacur,Acur,Dcur,w,alpha,nu,xi,ttype,Q=Q,q=q) {
#MH draws from posterior of Sigma= t(A) %*% D %*% A given all other parameters, also conditional on w
# Input
# - mu: mean vector
# - Sigmacur: current value of Sigma
# - Acur, Dcur: current value of A,D, i.e. Sigmacur= t(Acur) %*% Dcur %*% Acur
# - w: current value of latent scale parameters
# - alpha: current value of asymmetry parameters
# - xi: (x>=0)/(1-alpha)^2 + (x<0)/(1+alpha)^2
# - ttype: independent or dependent skew-t
# - Q, q: prior on Sigma is InvWishart(Q,q), where q are the degrees of freedom
# Output: list with 2 components
# - Sigmanew: sampled value of Sigma (it may be equal to Sigmacur)
# - Anew: sampled value of A (it may be equal to Acur)
# - Dnew: sampled value of D (it may be equal to Dcur)
# - accept: indicates if Sigmanew==Sigmacur
#
n <- ncol(tx); p <- nrow(tx)
txmu <- tx-mu
#Proposal parameters
if (n>1) {
e <- eigen(cov(t(tx)))
Aprop <- t(e$vectors); Aprop <- Aprop * ifelse(sign(Aprop[, 1]) == -1, -1, 1); Dprop <- diag(e$values,ncol=p)
txortho <- Aprop %*% txmu
xiprop <- t((txortho>=0) / (1-alpha)^2 + (txortho<0) / (1+alpha)^2)
xstd <- t(txortho) * sqrt(xiprop/w)
Dest <- diag(colSums(xstd^2),ncol=p)
Sprop <- Q + matrix(mu,ncol=1) %*% matrix(mu,nrow=1) + t(Aprop) %*% Dest %*% Aprop
} else {
Sprop <- Q + matrix(mu,ncol=1) %*% matrix(mu,nrow=1) + tx %*% t(tx)
}
#Propose new value
Sigmanew <- rinvwishart(n+q+p, Sprop)
e <- eigen(Sigmanew,symmetric=TRUE)
Anew <- t(e$vectors); Dnew <- diag(e$values,ncol=p); Dnewinv <- diag(1/e$values,ncol=p); Anew <- Anew * ifelse(sign(Anew[,1])== -1, -1, 1)
#Ratio of proposal densities
logpropcur <- diwishfast(Acur,diag(Dcur),n+q+p,Sprop,logscale=TRUE)
logpropnew <- diwishfast(Anew,diag(Dnew),n+q+p,Sprop,logscale=TRUE)
#Ratio of log-posterior densities
#loglnew <- sum(dskewt(t(tx),mu=mu,A=Anew,D=Dnew,alpha=alpha,nu=nu,logscale=TRUE)) #unconditional on w
loglnew <- loglSigma(txmu,Anew,Dnew,alpha,w) #conditional on w
#loglnew <- sum(dmvnorm(t(txmu),sigma=t(Anew) %*% Dnew %*% Anew,log=TRUE)) #check: same result when alpha=0,w=1
#loglcur <- sum(dskewt(t(tx),mu=mu,A=Acur,D=Dcur,alpha=alpha,nu=nu,logscale=TRUE)) #unconditional on w
loglcur <- loglSigma(txmu,Acur,Dcur,alpha,w) #conditional on w
#loglcur <- sum(dmvnorm(t(txmu),sigma=t(Acur) %*% Dcur %*% Acur,log=TRUE)) #check: same result when alpha=0,w=1
Dcurinv <- diag(1/diag(Dcur),ncol=p)
smunew <- matrix(mu,nrow=1) %*% t(Anew) %*% Dnewinv %*% Anew %*% matrix(mu,ncol=1)
smucur <- matrix(mu,nrow=1) %*% t(Acur) %*% Dcurinv %*% Acur %*% matrix(mu,ncol=1)
logpnew <- - 0.5*smunew + diwishfast(Anew,diag(Dnew),v=q+p,S=Q,logscale=TRUE)
logpcur <- - 0.5*smucur + diwishfast(Acur,diag(Dcur),v=q+p,S=Q,logscale=TRUE)
logposnew <- loglnew+logpnew
logposcur <- loglcur+logpcur
#Accept/reject move
u <- logposnew - logposcur + logpropcur - logpropnew
if (runif(1)<exp(u)) {
ans <- list(Sigmanew=Sigmanew,Anew=Anew,Dnew=Dnew,logposdif=logposnew-logposcur,logpropdif=logpropnew-logpropcur,accept=TRUE)
} else {
ans <- list(Sigmanew=Sigmacur,Anew=Acur,Dnew=Dcur,logposdif=logposnew-logposcur,logpropdif=logpropnew-logpropcur,accept=FALSE)
}
return(ans)
}
loglSigma <- function(txmu,A,D,alpha,w) {
#log-likelihood of (x-mu) evaluated at Sigma=t(A) D A, alpha, w
txortho <- diag(sqrt(1/diag(D)),ncol=ncol(D)) %*% A %*% txmu
xi <- t((txortho>=0) / (1-alpha)^2 + (txortho<0) / (1+alpha)^2)
#Option 1 (slower)
#xstd <- t(txortho) / w
#ans <- 0
#for (i in 1:ncol(xstd)) { ans <- ans + sum(dtp3(xstd[,i],mu=0,par1=1,par2=alpha[i],FUN=function(z,log) dnorm(z,log=log),param="eps",log=TRUE)) }
#for (i in 1:ncol(xstd)) { ans <- ans + sum(dtp3(xstd[,i],mu=0,par1=1,par2=alpha[i],FUN=function(z,log) dt(z,log=log,df=10^6),param="eps",log=TRUE)) }
#Option 2 (faster)
xstd <- t(txortho) * sqrt(xi/w)
ans <- -0.5 * sum(xstd^2) -0.5*nrow(xstd)*sum(log(diag(D))) - 0.5*nrow(xstd)*ncol(xstd)*log(2*pi)
#-0.5 * sum(xstd^2) -0.5*sum(log(diag(D)))
return(ans)
}
rwSkewtGibbs <- function(xstd,xi,nu,ttype) {
#Sample from the conditional posterior of the skew-t latent weights w given all other parameters
#Input
# - xstd: matrix with standardized & uncorrelated observations, i.e. xstd= Sigma^(-1/2) (y-mu)
# - xi: matrix with (xstd>=0)/(1-alpha)^2 + (xstd<0)/(1+alpha)^2, where alpha is the vector with asymmetry parameters
# - nu: degrees of freedom
# - ttype: independent skew-t or dependent skew-t
#Output: if ttype=='independent' a matrix with dim(xstd), else a matrix with nrow(xstd) rows and 1 column
if (ttype=='independent') {
par1 <- (nu+1) / 2; par2 <- (nu + xi * xstd^2) / 2; #par2 <- (nu + xstd^2) / 2
ans <- matrix(1/rgamma(nrow(xstd)*ncol(xstd),shape=par1,rate=par2), nrow=nrow(xstd), ncol=ncol(xstd))
} else {
par1 <- (nu+ncol(xstd)) / 2; par2 <- (nu + rowSums(xi * xstd^2)) / 2 #par2 <- (nu + rowSums(xstd^2)) / 2
ans <- matrix(1/rgamma(nrow(xstd),shape=par1,rate=par2),ncol=1)
}
return(ans)
}
ralphaSkewtGibbs <- function(alphacur,xstd,w,a,b,ttype) {
#Metropolis-Hastings sampler for vector of asymmetry parameters
# Input
# - xstd: standardized & orthogonalized x values, i.e. sqrt(D) A (x-mu)
# - w: latent scale parameters
# - a, b: prior on .5 * (alpha+1) ~ Beta(a,b)
# - ttype: independent or dependent skew-t
if (ttype=='independent') {
s1 <- colSums((xstd>=0) * xstd^2 / w)
s2 <- colSums((xstd< 0) * xstd^2 / w)
} else {
s1 <- colSums((xstd>=0) * xstd^2 / w)
s2 <- colSums((xstd< 0) * xstd^2 / w)
}
s1[s1==0] <- .0001; s2[s2==0] <- .0001
alphaprop <- ralphaPropT(s1=s1,s2=s2,n=max(3,nrow(xstd)),a=a,b=b)
u <- dt((alphacur-alphaprop$alpha.mode)/alphaprop$sd.mode,df=max(3,nrow(xstd)),log=TRUE) - alphaprop$logpdfProp
#u <- dnorm(alphacur,alphaprop$alpha.mode,sd=alphaprop$sd.mode,log=TRUE) - alphaprop$logpdfProp
u <- u + falpha(alphaprop$alpha,s1=s1,s2=s2,a=a,b=b,logscale=TRUE) - falpha(alphacur,s1=s1,s2=s2,a=a,b=b,logscale=TRUE)
accept <- (runif(length(u)) < exp(u))
alphanew <- alphaprop$alpha
alphanew[!accept] <- alphacur[!accept]
return(list(alphanew=alphanew,accept=accept))
}
#Full conditional posterior density of alpha
falpha <- function(alpha,s1,s2,a=1,b=1,logscale=FALSE) {
ans <- -0.5*(s1/(1-alpha)^2 + s2/(1+alpha)^2) + (a-1)*log(1+alpha) + (b-1)*log(1-alpha)
if (!logscale) ans <- exp(ans)
return(ans)
}
#Density of Normal approximation to full posterior
falphaProp <- function(alpha,s1,s2,a=1,b=1,alpha.mode,alpha.sd,logscale=FALSE) {
m <- polyroot(c(s2-s1,-3*(s1+s2),3*(s2-s1),-(s1+s2)))
alpha.mode <- Re(m[which.min(abs(m))])
vinv <- 3*s2/(1+alpha.mode)^4 + 3*s1/(1-alpha.mode)^4 + (a-1)/(1+alpha.mode)^2 + (b-1)/(1-alpha.mode)^2
sd.mode <- sqrt(1/vinv)
dnorm(alpha,mean=alpha.mode,sd=sd.mode,log=logscale)
}
ralphaPropT <- function(s1,s2,n,a,b) {
#Draw from truncated T approximation to full posterior of alpha (s1,s2 can be vectors)
# - s1,s2: parameters from the likelihood term
# - n: sample size
# - a,b: prior parameters 0.5(alpha+1) ~ Beta(a,b)
# Output: list with following elements
# - alpha: proposed values for alpha ~ N(alpha.mode,sd.mode)
# - alpha.mode, sd.mode: vector mean and standard deviation of approximating Normal
# - logpdfProp: log proposal density evaluated at the returned alpha
alpha.mode <- double(length(s1))
for (i in 1:length(s1)) {
m <- polyroot(c(s2[i]-s1[i],-3*(s1[i]+s2[i]),3*(s2[i]-s1[i]),-(s1[i]+s2[i])))
alpha.mode[i] <- Re(m[which.min(abs(m))])
}
vinv <- 3*s2/(1+alpha.mode)^4 + 3*s1/(1-alpha.mode)^4 + (a-1)/(1+alpha.mode)^2 + (b-1)/(1-alpha.mode)^2
sd.mode <- sqrt(1/vinv)
u <- runif(length(s1),pt((-1-alpha.mode)/sd.mode,df=n),pt((1-alpha.mode)/sd.mode,df=n))
alpha <- qt(u,df=n)*sd.mode + alpha.mode
logpdfProp <- dt((alpha-alpha.mode)/sd.mode,df=n,log=TRUE)
#plot(dt((aseq-alpha.mode[1])/sd.mode[1],df=n,log=TRUE), (-(n+1)/2) * log(1+(1/n)*((aseq-alpha.mode[1])/sd.mode[1])^2))
return(list(alpha=alpha,alpha.mode=alpha.mode,sd.mode=sd.mode,logpdfProp=logpdfProp))
}
ralphaPropNorm <- function(s1,s2,a,b) {
#Draw from truncated Normal approximation to full posterior of alpha (s1,s2 can be vectors)
# - s1,s2: parameters from the likelihood term
# - a,b: prior parameters 0.5(alpha+1) ~ Beta(a,b)
# Output: list with following elements
# - alpha: proposed values for alpha ~ N(alpha.mode,sd.mode)
# - alpha.mode, sd.mode: vector mean and standard deviation of approximating Normal
# - logpdfProp: log proposal density evaluated at the returned alpha
alpha.mode <- double(length(s1))
for (i in 1:length(s1)) {
m <- polyroot(c(s2[i]-s1[i],-3*(s1[i]+s2[i]),3*(s2[i]-s1[i]),-(s1[i]+s2[i])))
alpha.mode[i] <- Re(m[which.min(abs(m))])
}
vinv <- 3*s2/(1+alpha.mode)^4 + 3*s1/(1-alpha.mode)^4 + (a-1)/(1+alpha.mode)^2 + (b-1)/(1-alpha.mode)^2
sd.mode <- sqrt(1/vinv)
u <- runif(length(s1),pnorm(-1,alpha.mode,sd=sd.mode),pnorm(1,alpha.mode,sd.mode))
alpha <- qnorm(u,alpha.mode,sd=sd.mode)
logpdfProp <- dnorm(alpha,mean=alpha.mode,sd=sd.mode,log=TRUE)
return(list(alpha=alpha,alpha.mode=alpha.mode,sd.mode=sd.mode,logpdfProp=logpdfProp))
}
rnuSkewtGibbs <- function(x, mu, Sigma, A, D, alpha, nuprobs, ttype) {
#Sample from the conditional posterior of the skew-t degrees of freedom parameter nu (assumed to take values in names(nuprobs)) given mu, Sigma, alpha
# Input
# - x: matrix with observed data. Observations in rows, variables in columns
# - mu: location parameter
# - Sigma: scale matrix parameter
# - A: optional parameter containing pre-computed A=t(eigen(Sigma)). For identifiability all elements in A[,1] must be positive, if any entry is negative then the corresponding row is changed sign
# - D: optional parameter containing pre-computed A=diag(eigen(Sigma)$values)
# - alpha: vector with asymmetry coefficients
# Return: value of nu
nuvals <- as.numeric(names(nuprobs))
pp <- sapply(nuvals, function(v) sum(dskewt(x=x,mu=mu,A=A,D=D,alpha=alpha,nu=v,param='eps',logscale=TRUE,ttype=ttype))) + log(nuprobs)
pp <- exp(pp - max(pp))
pp <- pp/sum(pp)
nuvals[match(TRUE,runif(1)<cumsum(pp))]
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.