R/naivebayes.R
In targeted: Targeted Inference

Documented in naivebayes predict.naivebayes

#' Naive Bayes Classifier
#'
#' @title Naive Bayes classifier
#' @param formula Formula with syntax: response ~ predictors | weights
#' @param data data.frame
#' @param weights optional frequency weights
#' @param kernel If TRUE a kernel estimator is used for numeric predictors
#' (otherwise a gaussian model is used)
#' @param laplace.smooth Laplace smoothing
#' @param prior optional prior probabilities (default estimated from data)
#' @param ... additional arguments to lower level functions
#' @aliases naivebayes
#' @export
#' @return An object of class '\code{naivebayes}' is returned. See
#'   \code{\link{naivebayes-class}} for more details about this class and
#'   its generic functions.
#' @author Klaus K. Holst
#' @examples
#' library(data.table)
#' data(iris)
#' m <- naivebayes(Species ~ Sepal.Width + Petal.Length, data = iris)
#' pr <- predict(m, newdata = iris)
#'
#' # using weights to reduce the size of the dataset
#' n <- 5e2
#' x <- rnorm(n, sd = 2) > 0
#' y <- rbinom(n, 1, lava::expit(x))
#' # full data set
#' d1 <- data.frame(y, x = as.factor(x > 0))
#' m1 <- naivebayes(y ~ x, data = d1)
#' # reduced data set
#' d2 <- data.table(d1)[, .(.N), by = .(y, x)]
#' m2 <- naivebayes(y ~ x, data = d2, weights = d2$N)
#' all(predict(m1, d1) == predict(m2, d1))
naivebayes <- function(formula, data, weights = NULL,
                       kernel = FALSE, laplace.smooth = 0,
                       prior = NULL, ...) {

  if (missing(data)) stop("Need data as data.frame or data.table")
  des <- design(formula, data = as.data.frame(data), specials = "weights")
  y <- as.factor(des$y)
  if (is.null(weights)) {
    weights <- des$weights
    if (is.null(weights)) weights <- rep(1, length(y))
  }
  predictor <- des$term.labels
  if (inherits(data, "data.table")) {
    X <- data[, predictor, with = FALSE, drop = FALSE]
  } else {
    X <- data[, predictor, drop = FALSE]
  }
  charvar <- names(Filter(is.character, X))
  ## Convert character vectors to factors to avoid loosing levels
  ## when calculating conditional probabilities
  if (length(charvar) > 0) {
    if (inherits(data, "data.table")) {
      for (col in charvar) data.table::set(X, j = col, value = factor(X[[col]]))
    } else {
      for (col in charvar) X[, col] <- factor(X[, col])
    }
  }
  xtabs0 <- function(counts, x, prop = FALSE, ...) {
    res <- stats::xtabs(counts ~ x)
    if (prop) res <- res / sum(res)
    return(structure(as.numeric(res), names = names(res)))
  }
  cls <- levels(y)
  prior0 <- xtabs0(weights, y, prop = TRUE)
  if (!is.null(prior)) {
    warning("prior argument is not implemented and has no effect.")
    ## user-defined priors
    ## TODO: Assign new values and renormalize
  }
  estcond <- function(x, weights, ...) {
    if (inherits(x, "data.table")) x <- as.matrix(x[, 1])
    w <- weights / sum(weights)
    if (is.numeric(x)) {
      if (!kernel) {
        ## TODO: "smoothing" in sparse cases
        est <- c(mean = sum(x * w), sd = (sum(x^2 * w) - sum(x * w)^2)^.5)
        return(list(model = "gaussian", estimate = est))
      } else {
        ## Kernel density estimate
        ## TODO: add tuning parameters?
        return(list(
          model = "density",
          estimate = stats::density(x, weights = w)
        ))
      }
    } else {
      ## Laplace smoothing, (x+laplace.smooth)/(N+k*alpha),
      ## x: counts in different categories;
      ## N: total counts; k: number of categories
      ## idx <- rep(seq_along(x),weights);
      M <- xtabs0(weights, x) + laplace.smooth
      return(list(model = "multinomial", estimate = M / sum(M)))
    }
  }
  pcond <- lapply(cls, function(i) {
    idx <- which(y == i)
    if (inherits(data, "data.table")) {
      m0 <- as.data.frame(X[idx, predictor, with = FALSE, drop = FALSE])
    } else {
      m0 <- as.data.frame(X[idx, predictor, drop = FALSE])
    }
    return(lapply(m0, estcond, weights = weights[idx]))
  })
  res <- structure(
    list(
      prior = prior0, ## Pr(class)
      conditional = pcond, ## Pr(x|class)
      classes = cls,
      xvar = names(pcond[[1]]),
      xmodel = unlist(lapply(pcond[[1]], function(x) x$model)),
      design = des,
      call = match.call()
    ),
    class = "naivebayes"
  )
  return(res)
}

#' @export
print.naivebayes <-
function(x, ...) {
    print(x$call)
    cat("\n")
    val <- x$prior
    names(val) <- paste0(seq_along(val), ": ", names(val))
    print(data.table::data.table(Prior=val))
    cat("\n")
}

#' Naive Bayes Classifier predictions
#' @title Predictions for Naive Bayes Classifier
#' @param object density object
#' @param newdata new data on which to make predictions
#' @param expectation Variable to calculate conditional expectation wrt
#' probabilities from naivebayes classifier
#' @param threshold Threshold parameters. First element defines the threshold
#' on the probabilities and the second element the value to set those
#' truncated probabilities to.
#' @param ... Additional arguments to lower level functions
#' @export
#' @author Klaus K. Holst
predict.naivebayes <- function(object, newdata, # nolint
                       expectation = NULL,
                       threshold = c(1e-3, 1e-3), ...) {
  if (missing(newdata)) stop("Need new data to make predictions")
  ## Likelihood P(class|x) = P(class)P(x|class)/P(x)
  if (!is.null(expectation)) {
    if (inherits(expectation, "formula")) {
      expectation <- all.vars(expectation)
    }
    z <- newdata[, expectation] # nolint
    ## TODO: Not used for now
  }
  if (!all(c(object$model$predictor, expectation) %in% names(newdata))) {
    stop("Variables missing in data")
  }

  if (is.null(expectation)) {
    lposterior <- matrix(nrow = nrow(newdata), ncol = length(object$classes))
  }
  predictor <- object$design$term.labels
  if (inherits(newdata, "data.table")) {
    X <- newdata[, predictor, with = FALSE, drop = FALSE]
  } else {
    X <- newdata[, predictor, drop = FALSE]
  }
  charvar <- names(Filter(is.character, X))
  if (length(charvar) > 0) {
    if (inherits(newdata, "data.table")) {
      for (col in charvar) data.table::set(X, j = col, value = factor(X[[col]]))
    } else {
      for (col in charvar) X[, col] <- factor(X[, col])
    }
  }
  px <- rep(0, nrow(newdata))
  for (i in seq_along(object$classes)) {
    # P(x|c) = prod P(xi|c) pr independence assumption
    lpcond <- rep(0, nrow(newdata))
    for (j in seq_along(predictor)) {
      x0 <- object$conditional[[i]]
      nam <- object$xvar[j]
      if (inherits(newdata, "data.table")) {
        x <- as.matrix(X[, nam, with = FALSE, drop = FALSE])[, 1]
      } else {
        x <- as.matrix(X[, nam, drop = FALSE])[, 1]
      }
      estx <- x0[[j]]
      if (is.list(estx)) {
        estx <- estx$estimate
      }
      curmodel <- object$xmodel[j]
      if (curmodel == "multinomial") {
        xs <- unique(x)
        misx <- which(!(xs %in% names(estx)))
        if (length(misx) > 0) {
          nn <- c(names(estx), xs[misx])
          estx <- c(estx, rep(NA, length(misx)))
          names(estx) <- nn
        }
        estx[estx < threshold[1] | is.na(estx)] <- threshold[2]
        estx <- estx / sum(estx)
        lpcond <- lpcond + log(estx[x])
      }
      if (curmodel == "gaussian") {
        ## TODO: treshold
        if (is.na(estx[1])) estx[1] <- 0
        if (is.na(estx[2]) || estx[2] < 1e-16) estx[2] <- 1
        lpcond <- lpcond + dnorm(x, mean = estx[1], sd = estx[2], log = TRUE)
      }
      if (curmodel %in% c("kernel", "density")) {
        estx <- predict(estx, x)
        ## TODO: treshold
        lpcond <- lpcond + log(estx)
      }
    }
    logjoint <- lpcond + log(object$prior[i]) ## log P(x,c)
    if (is.null(expectation)) {
      lposterior[, i] <- logjoint
    }
    px <- px + exp(logjoint) ## P(x)
    lposterior[, i] <- logjoint
  }

  if (is.null(expectation)) {
    for (i in seq_len(ncol(lposterior))) {
      lposterior[, i] <- lposterior[, i] - log(px) ## log P(c|x)
    }
  }
  colnames(lposterior) <- object$classes
  return(exp(lposterior))
}

#' @title naivebayes class object
#'
#' @description The functions [naivebayes] returns an object of the type
#'   \code{naivebayes}.
#'
#' An object of class '\code{naivebayes}' is a list with at least
#' the following components:
#'
#' \describe{
#'   \item{prior}{Matrix with prior probabilities,
#' i.e. marginal class probabilities Pr(class)}
#'   \item{pcond}{list of matrices with conditional probabilities
#' of the features given the classes (one list element per class), Pr(x|class)}
#'   \item{classes}{Names (character vector) of the classes}
#'   \item{xvar}{Names of predictors}
#'   \item{xmodel}{Conditional model for each predictor}
#'   \item{design}{Model design object}
#'   \item{call}{The function call which instantiated the object}
#' }
#'
#' @section S3 generics:
#' The following S3 generic functions are available for an object
#' of class \code{naivebayes}:
#' \describe{
#'   \item{\code{predict}}{Predict class probabilities for new features data.}
#'   \item{\code{print}}{Basic print method.}
#'  }
#'
#' @aliases naivebayes-class
#' @seealso [naivebayes()]
#' @return objects of the S3 class '\code{naivebayes}'
#' @examples ## See example(naivebayes) for examples
#' @docType class
#' @name naivebayes-class
NULL