R/class_generic.R

Defines functions dBNPdens.BNPdens BNPdens2coda.BNPdens plot.BNPdens print.BNPdens summary.BNPdens BNPpart BNPdens

Documented in BNPdens BNPdens2coda.BNPdens BNPpart dBNPdens.BNPdens plot.BNPdens print.BNPdens summary.BNPdens

#' BNPdens class constructor
#'
#' @description A constructor for the \code{BNPdens} class. The class \code{BNPdens} is a named list containing
#' the output generated by a specified Bayesian nonparametric mixture model implemented by means of
#' a specified MCMC strategy, as in \code{PYdensity},  \code{DDPdensity}, and  \code{PYregression}.
#'
#' @param density a matrix containing the values taken by the density at the grid points;
#' @param data a dataset;
#' @param grideval a set of values where to evaluate the density;
#' @param grid_x regression grid, independent variable;
#' @param grid_y regression grid, dependent variable;
#' @param clust a (\code{niter - nburn}) \eqn{\times}{x} \code{nrow(data)}-dimensional matrix containing
#' the cluster labels for each observation (cols) and MCMC iteration (rows);
#' @param mean values for the location parameters;
#' @param beta coefficients for regression model (only for \code{PYregression});
#' @param sigma2 values of the scale parameters;
#' @param probs values for the mixture weights;
#' @param niter number of MCMC iterations;
#' @param nburn number of MCMC iterations to discard as burn-in;
#' @param tot_time total execution time;
#' @param univariate logical, \code{TRUE} if the model is univariate;
#' @param regression logical, \code{TRUE}  for the output of  \code{PYregression};
#' @param dep  logical, \code{TRUE}  for the output of  \code{DDPdensity};
#' @param group_log group allocation for each iteration (only for \code{DDPdensity});
#' @param group vector, allocation of observations to strata (only for \code{DDPdensity});
#' @param wvals values of the processes weights (only for \code{DDPdensity}).
#'
#' @examples
#' data_toy <- c(rnorm(100, -3, 1), rnorm(100, 3, 1))
#' grid <- seq(-7, 7, length.out = 50)
#' est_model <- PYdensity(y = data_toy, mcmc = list(niter = 100,
#'                       nburn = 10, nupd = 100), output = list(grid = grid))
#' str(est_model)
#' class(est_model)
#' @export
#'

BNPdens <- function(
  density = NULL,
  data = NULL,
  grideval = NULL,
  grid_x = NULL,
  grid_y = NULL,
  clust = NULL,
  mean = NULL,
  beta = NULL,
  sigma2 = NULL,
  probs = NULL,
  niter = NULL,
  nburn = NULL,
  tot_time = NULL,
  univariate = TRUE,
  regression = FALSE,
  dep = FALSE,
  group_log = NULL,
  group = NULL,
  wvals = NULL
){
  value <- list(density = density,
                data = data,
                grideval = grideval,
                grid_x = grid_x,
                grid_y = grid_y,
                clust = clust,
                mean = mean,
                beta = beta,
                sigma2 = sigma2,
                probs = probs,
                niter = niter,
                nburn = nburn,
                tot_time = tot_time,
                univariate = univariate,
                regression = regression,
                dep = dep,
                group_log = group_log,
                group = group,
                wvals = wvals)
  attr(value, "class") <- "BNPdens"
  value
}

#' BNPpart class constructor
#'
#' @description A constructor for the \code{BNPpart} class. The class \code{BNPpart} is a named list containing
#' the output of partition estimation methods.
#'
#' @param partitions a matrix, each row is a visited partition;
#' @param scores a vector, each value is the score of a visited partition;
#' @param psm a matrix, posterior similarity matrix.
#'
#' @examples
#' data_toy <- c(rnorm(100, -3, 1), rnorm(100, 3, 1))
#' grid <- seq(-7, 7, length.out = 50)
#' est_model <- PYdensity(y = data_toy, mcmc = list(niter = 100,
#'                       nburn = 10, nupd = 100), output = list(grid = grid))
#' part <- partition(est_model)
#' class(part)
#'
#' @export
#'

BNPpart <- function(
  partitions = NULL,
  scores = NULL,
  psm = NULL){
  value <- list(partitions = partitions,
                scores = scores,
                psm = psm)
  attr(value, "class") <- "BNPpart"
  value
}



# -----------------------------------------------------------------------
# SUMMARY
# -----------------------------------------------------------------------

#' BNPdens summary method
#'
#' @description The \code{summary.BNPdens} method provides summary information on \code{BNPdens} objects.
#' @param object an object of class \code{BNPdens};
#' @param ... additional arguments
#'
#' @rdname summary.BNPdens
#' @examples
#' data_toy <- c(rnorm(100, -3, 1), rnorm(100, 3, 1))
#' grid <- seq(-7, 7, length.out = 50)
#' est_model <- PYdensity(y = data_toy, mcmc = list(niter = 100,
#'                       nburn = 10, napprox = 10), output = list(grid = grid))
#' class(est_model)
#' summary(est_model)
#' @export

summary.BNPdens <- function(object, ...) {
  if(!object$dep){
    nuniq <- apply(object$clust, 1, function(x) length(unique(x)))
    if(object$univariate){
      cat("PYdensity function call:\n",
          object$nburn, "\tburn-in iterations\n",
          object$niter, "\titerations \n",
          "Global estimation time:", round(object$tot_time, digits = 2), "seconds\n",
          "Average number of groups: ", mean(nuniq),"\n",
          "Min number of groups: ", min(nuniq), "; Max number of groups: ", max(nuniq), "\n")
    } else {
      cat("PYdensity function call:\n",
          object$nburn, "\tburn-in iterations\n",
          object$niter, "\titerations \n",
          "Global estimation time:", round(object$tot_time, digits = 2), "seconds\n","Average number of groups: ", mean(nuniq),"\n",
          "Min number of groups: ", min(nuniq), "; Max number of groups: ", max(nuniq), "\n")
    }
  } else {
    nuniq <- sapply(unique(object$group), function(x) apply(object$clust, 1, function(y) length(unique(y[object$group == x]))))
    cat("DDPdensity function call:\n",
        length(table(object$group)), "\tdifferent groups\n",
        object$nburn, "\tburn-in iterations\n",
        object$niter, "\titerations \n",
        "Global estimation time:", round(object$tot_time, digits = 2), "seconds\n","Average number of groups: ", paste(colMeans(nuniq), collapse = " - "), "\n",
        "Min number of groups: ", paste(apply(nuniq, 2, min), collapse = " - "), "; Max number of groups: ", paste(apply(nuniq, 2, max), collapse = " - "), "\n")
  }
}

# -----------------------------------------------------------------------
# PRINT
# -----------------------------------------------------------------------

#' BNPdens print method
#'
#' @description The \code{BNPdens} method prints the type of a \code{BNPdens} object.
#' @param x an object of class \code{BNPdens};
#' @param ... additional arguments.
#' @rdname print.BNPdens
#' @examples
#' data_toy <- c(rnorm(100, -3, 1), rnorm(100, 3, 1))
#' grid <- seq(-7, 7, length.out = 50)
#' est_model <- PYdensity(y = data_toy, mcmc = list(niter = 100,
#'                       nburn = 10, napprox = 10), output = list(grid = grid))
#' class(est_model)
#' print(est_model)
#' @export

print.BNPdens <- function(x, ...) {
  cat("BNPdens object\n")
  if(!x$dep){
    if(x$regression){
      cat("Type: regression model")
    } else if(x$univariate){
      cat("Type: univariate density")
    } else {
      cat("Type: multivariate density")
    }
  } else {
    cat("Type: dependent Dirichlet process model")
  }
}

# -----------------------------------------------------------------------
# PLOT
# -----------------------------------------------------------------------
#' Density plot for BNPdens class
#'
#' @description Extension of the \code{plot} method to the \code{BNPdens} class. The method \code{plot.BNPdens} returns suitable plots for a \code{BNPdens}
#' object. See details.
#'
#' @details If the \code{BNPdens} object is generated by \code{PYdensity}, the function returns
#' the univariate or bivariate estimated density plot.
#' If the \code{BNPdens} object is generated by \code{PYregression}, the function returns
#' the scatterplot of the response variable jointly with the covariates (up to four), coloured according to the estimated partition.
#' up to four covariates.
#' If \code{x} is a \code{BNPdens} object generated by \code{DDPdensity}, the function returns
#' a wrapped plot with one density per group.
#' The plots can be enhanced in several ways: for univariate densities, if \code{show_hist = TRUE},
#' the plot shows also the histogram of the data; if \code{show_points = TRUE},
#' the plot shows also the observed points along the
#' x-axis; if \code{show_points = TRUE} and \code{show_clust = TRUE}, the points are colored
#' according to the partition estimated with the \code{partition} function.
#' For multivariate densities: if  \code{show_points = TRUE},
#' the plot shows also the scatterplot of the data;
#' if \code{show_points = TRUE} and  \code{show_clust = TRUE},
#' the points are colored according to the estimated partition.
#'
#' @param x an object of class \code{BNPdens};
#' @param dimension if \code{x} is the output of a model fitted to multivariate data,
#' \code{dimensions} specifies the two dimensions for the bivariate contour plot
#' (if they are equal, a marginal univarite plot is returned);
#' @param col the color of the lines;
#' @param show_points if \code{TRUE}, the function plots also the observations, default \code{FALSE};
#' @param show_hist if \code{TRUE}, and the model is univariate, the function plots also the histogram of the data, default  \code{FALSE};
#' @param show_clust if \code{TRUE} the function plots also the points colored with respect to the estimated partition, default  \code{FALSE};
#' @param bin_size if \code{show_hist = TRUE}, it correponds to the size of each bin,
#' default \code{range(data) / 30};
#' @param wrap_dim bivariate vector, if \code{x} is the output of \code{DDPdensity}, it correponds to the number of rows and columns in the plot. Default \code{c(ngroup, 1)};
#' @param xlab label of the horizontal axis;
#' @param ylab label of the vertical axis;
#' @param band if \code{TRUE} and \code{x} is the output of a univariate model or of \code{DDPdensity}, the plot method displays quantile-based posterior credible bands along with estimated densities;
#' @param conf_level bivariate vector, order of the quantiles for the posterior credible bands. Default \code{c(0.025, 0.975)};
#' @param ... additional arguments to be passed.
#'
#' @rdname plot.BNPdens
#'
#' @return A \code{ggplot2} object.
#'
#' @examples
#' # PYdensity example
#' data_toy <- c(rnorm(100, -3, 1), rnorm(100, 3, 1))
#' grid <- seq(-7, 7, length.out = 50)
#' est_model <- PYdensity(y = data_toy,
#'  mcmc = list(niter = 200, nburn = 100, nupd = 100),
#'  output = list(grid = grid))
#' class(est_model)
#' plot(est_model)
#'
#' # PYregression example
#' x_toy <- c(rnorm(100, 3, 1), rnorm(100, 3, 1))
#' y_toy <- c(x_toy[1:100] * 2 + 1, x_toy[101:200] * 6 + 1) + rnorm(200, 0, 1)
#' grid_x <- c(0, 1, 2, 3, 4, 5)
#' grid_y <- seq(0, 35, length.out = 50)
#' est_model <- PYregression(y = y_toy, x = x_toy,
#' mcmc = list(niter = 200, nburn = 100),
#' output = list(grid_x = grid_x, grid_y = grid_y))
#' summary(est_model)
#' plot(est_model)
#'
#' # DDPdensity example
#' data_toy <- c(rnorm(50, -4, 1), rnorm(100, 0, 1), rnorm(50, 4, 1))
#' group_toy <- c(rep(1,100), rep(2,100))
#' grid <- seq(-7, 7, length.out = 50)
#' est_model <- DDPdensity(y = data_toy, group = group_toy,
#' mcmc = list(niter = 200, nburn = 100, napprox_unif = 50),
#' output = list(grid = grid))
#' summary(est_model)
#' plot(est_model)
#'
#'
#' @export

plot.BNPdens <- function(x, dimension = c(1,2), col = "#0037c4", show_points = F,
                         show_hist = F, show_clust = F, bin_size = NULL, wrap_dim = NULL,
                         xlab = "", ylab = "", band = T, conf_level = c(0.025, 0.975), ...) {
  ggplot2::theme_set(ggplot2::theme_bw())
  if(is.null(x$density)){
    with(x, {
      xlab <- ifelse(xlab == "", "group", xlab)
      ylab <- ifelse(ylab == "", "count", ylab)
      part_temp <- partition(x)
      temp_plot <- ggplot2::ggplot(data.frame(data = as.factor(part_temp$partitions[1,]))) +
                    ggplot2::geom_bar(map = ggplot2::aes(x = data, color = data, fill = data), alpha = 0.3) +
                    ggplot2::labs(x = xlab, y = ylab) +
                    ggplot2::theme(legend.position = "none")
      temp_plot
    })
  } else {
    if(isTRUE(show_clust)){show_points <- TRUE}
    if(!x$regression){
      if(!x$dep){
        if(x$univariate){

          with(x,{
            if(is.null(bin_size)) bin_size <- (range(x$data)[2] - range(x$data)[1]) / 30
            data_plot = data.frame(V1 = x$data, V2 = partition(x)$partitions[3,])

            if(!is.null(x$density)){
              if(length(dim(x$density)) == 2){
                plot_df <- data.frame(grid = x$grideval, val = colMeans(x$density))
              } else {
                plot_df <- data.frame(grid = x$grideval, val = x$density)
              }
              if(isTRUE(band) & length(dim(x$density)) == 2){
                plot_df <- data.frame(grid = x$grideval,
                                      val  = colMeans(x$density),
                                      band_low = apply(x$density, 2, function(z) quantile(z, conf_level[1])),
                                      band_up  = apply(x$density, 2, function(z) quantile(z, conf_level[2])))
              }
            }

            temp_plot <- ggplot2::ggplot(plot_df, mapping = ggplot2::aes(x = grid, y = val))

            if(show_hist){
              temp_plot <- temp_plot + ggplot2::geom_histogram(data = data_plot, ggplot2::aes(x = V1, y = ..density..),
                                                               fill = "#EFEFEF", col = "#969696", binwidth = bin_size)
            }
            if(show_clust){
              temp_plot <- temp_plot + ggplot2::geom_point(data = data_plot, ggplot2::aes(x = V1, y =0, col = as.factor(V2)))
            }
            if(isTRUE(show_points) & !isTRUE(show_clust)){
              temp_plot <- temp_plot + ggplot2::geom_point(data = data_plot, ggplot2::aes(x = V1, y =0), color = "#646464")
            }
            if(isTRUE(band)){
              temp_plot <- temp_plot + ggplot2::geom_ribbon(data = plot_df,
                                                            mapping = ggplot2::aes(x = grid, ymin = band_low, ymax = band_up),
                                                            fill = col, alpha = 0.3)
            }

            temp_plot <- temp_plot +
              ggplot2::geom_line(mapping = ggplot2::aes(x = grid, y = val), size= 1, color = col) +
              ggplot2::theme(legend.position = "none") +
              ggplot2::labs(x = xlab, y = ylab)
            temp_plot
          })

        } else {
          with(x, {
            data_plot <- data.frame(V1 = x$data[,dimension[1]], V2 = x$data[,dimension[2]], V3 = partition(x)$partitions[3,])
            if(dim(x$density)[2] > 1){
              plot_df <- as.data.frame(cbind(x$grideval, colMeans(x$density)))
            } else {
              plot_df <- as.data.frame(cbind(x$grideval, x$density))
            }

            names(plot_df) = c(paste("GR", 1:ncol(x$grideval), sep = ''), "V1")
            if(dimension[1] == dimension[2]){
              plot_df_use <- aggregate(plot_df, by = list(plot_df[[dimension[1]]]), FUN = sum)
              temp_plot <- ggplot2::ggplot(data = plot_df_use, mapping = ggplot2::aes(x = Group.1, y = V1)) +
                ggplot2::geom_line(mapping = ggplot2::aes(x = Group.1, y = V1), size= 1, color = col) +
                ggplot2::labs(x = xlab, y = ylab)
            } else {
              plot_df_use <- aggregate(plot_df, by = list(plot_df[[dimension[1]]],plot_df[[dimension[2]]]), FUN = sum)
              temp_plot <- ggplot2::ggplot(data = plot_df_use, mapping = ggplot2::aes(x = Group.1, y = Group.2, z = V1))

              if(isTRUE(show_points) & !isTRUE(show_clust)){
                temp_plot <- temp_plot + ggplot2::geom_point(data = data_plot, ggplot2::aes(x = V1, y = V2), col = "#646464")
              }

              if(isTRUE(show_clust)){
                temp_plot <- temp_plot + ggplot2::geom_point(data = data_plot, ggplot2::aes(x = V1, y = V2, col = as.factor(V3)))
              }

              temp_plot <- temp_plot +
                ggplot2::stat_contour(data = plot_df_use, mapping = ggplot2::aes(x = Group.1, y = Group.2, z = V1), bins = 10, col = col) +
                ggplot2::theme(legend.position = "none") +
                ggplot2::labs(x = xlab, y = ylab)
              temp_plot
            }
          })
        }
      } else {
        if(is.null(wrap_dim)) wrap_dim = c(length(unique(x$group)), 1)
        with(x,{
          ngr <- length(unique(x$group))
          if(isTRUE(band)){
            plot_df <- data.frame(grid = rep(x$grideval, ngr),
                                  val  = as.vector(apply(x$density, c(1,2), mean)),
                                  group = factor(rep(levels(factor(x$group)), each = length(x$grideval)),
                                                 levels = levels(factor(x$group))),
                                  band_low = as.vector(apply(x$density, c(1,2), function(z) quantile(z, conf_level[1]))),
                                  band_up  = as.vector(apply(x$density, c(1,2), function(z) quantile(z, conf_level[2]))))
          } else {
            plot_df <- data.frame(grid = rep(x$grideval, ngr),
                                  val  = as.vector(apply(x$density, c(1,2), mean)),
                                  group = factor(rep(levels(factor(x$group)), each = length(x$grideval)),
                                                 levels = levels(factor(x$group))))
          }

          temp_plot <- ggplot2::ggplot(plot_df, mapping = ggplot2::aes(x = grid, y = val)) +
            ggplot2::labs(x = xlab, y = ylab) +
            ggplot2::facet_wrap(~ factor(group), nrow = wrap_dim[1], ncol = wrap_dim[2])

          if(isTRUE(band)){
            temp_plot <- temp_plot + ggplot2::geom_ribbon(data = plot_df,
                                 mapping = ggplot2::aes(x = grid, ymin = band_low, ymax = band_up), fill = col,
                                 colour = NA, alpha = 0.3)
            }

          temp_plot <- temp_plot +
            ggplot2::geom_line(color = col) +
            ggplot2::guides(fill=FALSE, color=FALSE)
          temp_plot
        })
      }
    } else {
      with(x,{
        plot_df <- as.data.frame(x$data)
        colnames(plot_df) = paste0("V", 1:ncol(x$data))
        part <- partition(x)$partitions[3,]

        if(ncol(x$data) == 2){
          temp_plot <- ggplot2::ggplot(plot_df, mapping = ggplot2::aes(x = V1, y = V2)) +
            ggplot2::geom_point(ggplot2::aes(x = V2, y = V1, col = as.factor(part))) +
            ggplot2::guides(fill=FALSE, color=FALSE) +
            ggplot2::labs(x = xlab, y = ylab)
          temp_plot
        } else {
          temp_pl <- list()
          tcn <- colnames(plot_df)
          for(pl in 1:(min(4, ncol(x$data) - 1))){
            colnames(plot_df)[pl + 1] = "temp"
            temp_pl[[pl]] <- ggplot2::ggplot(plot_df, mapping = ggplot2::aes(x = temp, y = V2)) +
              ggplot2::geom_point(ggplot2::aes(x = temp, y = V1, col = as.factor(part))) +
              ggplot2::guides(fill=FALSE, color=FALSE) +
              ggplot2::labs(x = paste0("X", pl), y = "Y")
            colnames(plot_df)[pl + 1] = tcn[pl + 1]
          }
          ggpubr::ggarrange(plotlist = temp_pl, ncol = 2)
        }
      })
    }
  }
}



# -----------------------------------------------------------------------
# EXPORT TO CODA
# -----------------------------------------------------------------------

#' set generic
#' @name BNPdens2coda
#' @keywords internal
#' @export

BNPdens2coda <- function (object, dens) {
  UseMethod("BNPdens2coda", object = object)
}

#'
#' Export to coda interface
#'
#' @description The method \code{BNPdens2coda} converts a \code{BNPdens} object into a \code{coda} mcmc object.
#'
#' @param object a BNPdens object;
#' @param dens logical, it can be TRUE only for models estimated with \code{PYdensity}.
#' If TRUE, it converts to \code{coda} also the estimated density. Default FALSE.
#'
#' @rdname BNPdens2coda.BNPdens
#'
#' @return
#' an mcmc object
#'
#' @examples
#' data_toy <- cbind(c(rnorm(100, -3, 1), rnorm(100, 3, 1)),
#'                   c(rnorm(100, -3, 1), rnorm(100, 3, 1)))
#' grid <- expand.grid(seq(-7, 7, length.out = 50),
#'                     seq(-7, 7, length.out = 50))
#' est_model <- PYdensity(y = data_toy, mcmc = list(niter = 200, nburn = 100),
#'                        output = list(grid = grid))
#' coda_mcmc <- BNPdens2coda(est_model)
#' class(coda_mcmc)
#'
#' @export
#'

BNPdens2coda.BNPdens <- function(object, dens = FALSE){
  if(length(dim(object$density)) < 2) dens = FALSE
  if(object$dep) dens = FALSE
  if(object$regression) dens = FALSE

  if(!dens){
    if(!object$dep){
      coda::as.mcmc(apply(object$clust, 1, function(x) length(unique(x))))
    } else {
      temp <- cbind(sapply(unique(object$group),
                           function(x) apply(object$clust, 1, function(y)
                             length(unique(y[object$group == x])))),
                    object$wvals)[,as.vector(t(cbind(1:(length(unique(object$group))), (length(unique(object$group)) + 1):(2 * (length(unique(object$group)))))))]
      colnames(temp) <- as.vector(sapply(1:ncol(object$wvals), function(x) paste(c("clusters in group", "weigth for group"), x)))
      coda::as.mcmc(temp)
    }
  } else {
    temp <- apply(object$clust, 1, function(x) length(unique(x)))
    temp <- rbind(temp, t(object$density))
    coda::as.mcmc(temp)
  }
}

# -----------------------------------------------------------------------
# EVALUATE THE DENSITY
# -----------------------------------------------------------------------

#' set generic
#' @name dBNPdens
#' @keywords internal
#' @export

dBNPdens <- function (object, x) {
  UseMethod("dBNPdens", object = object)
}

#'
#' Evaluate estimated univariate densities at a given point
#'
#' @description The method \code{dBNPdens} provides an approximated evaluation of estimated univariate densities at a given point, for a \code{BNPdens} class object.
#'
#' @param object a \code{BNPdens} object (only if univariate);
#' @param x the point where to evaluate the density.
#' @rdname dBNPdens.BNPdens
#'
#' @return
#' a numeric value
#'
#' @examples
#' data_toy <- c(rnorm(100, -3, 1), rnorm(100, 3, 1))
#' grid <- seq(-7, 7, length.out = 50)
#' est_model <- PYdensity(y = data_toy, mcmc = list(niter = 200, nburn = 100),
#'                        output = list(grid = grid))
#' x <- 1.4
#' dBNPdens(est_model, x)
#'
#' @export
#'

dBNPdens.BNPdens <- function(object, x){

  if(!isTRUE(object$univariate)) stop("The model must be univariate")
  if( x < min(object$grideval) | x > max(object$grideval) ) stop("x must be between the min and the max of the grid")
  if(!is.null(object$density)){
    if(length(dim(object$density)) == 2){
      y <- colMeans(object$density)
    } else {
      y <- object$density
    }
    approx(object$grideval, y, xout = x)$y
  } else {
    stop("An estimated density is required")
  }

}

Try the BNPmix package in your browser

Any scripts or data that you put into this service are public.

BNPmix documentation built on July 16, 2022, 1:04 a.m.