R/DPMslice.R

weights_seq <- function(j, kappa) {
    (1 - kappa) * kappa^(j - 1)
}

p.mixture <- function(grid, mu, sigma, nu) {
    sum(nu * dnorm(grid, mu, sigma))/sum(nu)
}

pexp.mixture <- function(grid, mu, sigma, nu) {
    sum(nu * dnorm(grid, mu, sigma)/exp(grid))/sum(nu)
}

p.mixture <- Vectorize(p.mixture, vectorize.args = "grid")

pexp.mixture <- Vectorize(pexp.mixture, vectorize.args = "grid")

theta_update <- function(data, Z, priors, H) {
    
    # Initalization
    mu     <- numeric(H)
    sigma2 <- numeric(H)
    
    # Priors
    mu0 <- priors$mu$mu0
    n0  <- priors$mu$n0
    a_sigma <- priors$sigma$a_sigma
    b_sigma <- priors$sigma$b_sigma

    for (j in 1:H) {
        # Full conditionals
        
        # Data inside the clutser
        datac <- data[Z == j]
        nc <- length(datac)
        
        # If no elements are in the cluster?
        if (nc > 0) {
            ybar <- mean(datac)
            s2 <- mean((datac-ybar)^2)
        } else {
            ybar <- 0
            s2   <- 0
        }
        
        # Sampling the variance. Does not depend on mu!!
        sigma2[j] <- 1/rgamma(1, a_sigma + nc/2, b_sigma + (nc*s2 + (nc*n0)/(nc+n0)*(ybar - mu0)^2 )/2)
        
        # Posterior parameter for the mean
        mu_mu <- n0/(nc+n0)*mu0 + nc/(nc+n0)*ybar
        sigma_mu <- sqrt(sigma2[j]/(nc + n0))
        # Sampling the full conditional
        mu[j] <- rnorm(1, mu_mu, sigma_mu)
        
    }
    list(mu = mu, sigma2 = sigma2)
}

#' Bayesian nonparametric density estimation
#'
#' Bayesian nonparametric density estimation
#' @param data A vector containing the data
#' @param R The number of replications
#' @param kappa the kappa parameter
#' @param alphaID Logical: should be putted a prior on alpha?
#' @export
dpmslice <- function(data, R = 10^3, kappa = 0.5,alphaID = FALSE,verbose=FALSE) {
    
    data.range <- range(data)
    grid       <- seq(from = data.range[1], to = data.range[2], length = 200)

    priors = list(baseline = list(mu = list(mu0 = mean(data), 
        n0 = 1), sigma = list(a_sigma = 2.5, b_sigma= 0.5*var(data) )), 
        alpha = list(a = 1))
    
    # Initialization
    n <- length(data)
    burnin <- min(round(0.2 * R),2000)
    theta <- list(mu = NULL, sigma = NULL)
    alpha <- 1
    
    y.post <- numeric(R)
    alpha.out <- numeric(R)
    component <- numeric(R)
    d.out <- 0
    dexp.out <- 0

    # Initial allocation using kmeans
    km.out <- kmeans(data, 7)
    Z <-  km.out$cluster
    
    # Initialization of 'mu' as the centers of the clusters
    theta$mu <- as.numeric(km.out$centers)
    theta$sigma2 <- NULL
    
    for (r in 1:(R + burnin)) {
        
        # Uniform distribution - Entire sample
        u <- runif(n, 0, weights_seq(Z, kappa))
        # Number of mixtures needed for each sample
        L <- 1 + floor((log(u) - log(1 - kappa))/log(kappa))
        H <- max(c(L, max(Z)))
        
        xi <- weights_seq(1:H, kappa)
        
        theta <- theta_update(data, Z, priors$baseline, H)
        V <- V_update(Z, alpha, H)
        nu <- nu_update(V)
        Z <- Z_update(data, nu, u, theta, kappa)
        
        if (alphaID) {
            alpha <- alpha_update(V, H, priors$alpha)
        }
        
        if (r > burnin) {
            rr <- r - burnin

            dens         <- p.mixture(grid, theta$mu, sqrt(theta$sigma2), nu)
            dens.exp     <- pexp.mixture(grid, theta$mu, sqrt(theta$sigma2), nu)
            
            dens <- p.mixture(grid, theta$mu, sqrt(theta$sigma2), nu)
            dens.exp <- pexp.mixture(grid, theta$mu, sqrt(theta$sigma2), nu)
            d.out <- (rr - 1)/rr * d.out + dens/rr
            dexp.out <- (rr - 1)/rr * dexp.out + dens.exp/rr
            # Posterior.
            Mixt <- sample(H, size = 1, prob = nu)
            
            alpha.out[rr] <- alpha
            component[rr] <- length(unique(Z))
            y.post[rr] <- rnorm(1, theta$mu[Mixt], sqrt(theta$sigma2[Mixt]))
        }
        if(verbose) if(r%%(R/100)==0) cat(paste(r," "))
    }
    
    return(list(y.post = y.post, alpha = alpha.out, 
        component = component, grid = grid, density = d.out, 
        ExponentialDensity = dexp.out))
}
tommasorigon/PhDPack documentation built on May 31, 2019, 6:19 p.m.