R/pprobability.R
In exams.forge: Support for Compiling Examination Tasks using the 'exams' Package

Documented in pprobability

#' @title Polynomial Probability
#' @rdname pprobability
#' @aliases polynomial_probability
#' @description Creates for each value of a discrete random variable, a polynomial and estimates the least squares and the maximum likelihood solution.
#' The following conditions stand:
#' * If `sample` is not given then the sample contains each `x` value once. 
#' * If `sample` is an integer, then it is interpreted as the sample size and a sample is generated by `rmultinom(1, sample, ddiscrete(runif(length(x))))`. 
#' * If `sample` is a vector, it is interpreted in such a way that the corresponding `x[i]` value occurs `i` times in the sample. Thus, `sum(sample)` is the sample size.
#' * If `coeff` is a `polylist` of `length(x)`, then these polynomials are taken.
#' * If `coeff` is a `matrix` with `length(x)`, columns and `power+1` rows, then the columns are interpreted as the coefficients of a polynomial.
#' * Otherwise `coeff` is interpreted as a vector from which the coefficient is sampled. The intercepts are sampled via `ddiscrete(runif(length(x)), zero=zero)`.
#' If `coeff` is not given then it is ensured that the least squares and the maximum likelihood solution exists and the estimated probabilities are between zero and one.
#' Otherwise, the results may contain `NA` or the estimated probabilities are outside the interval \eqn{[0;1]}.
#'
#' @param x numeric: values of a discrete random variable 
#' @param power integer: the degree for the polynomials (default: `1`), must be larger 0
#' @param zero logical: are zero coefficients and zero samples allowed? (default: `FALSE`)
#' @param coef matrix: for each degree coefficients to sample from (default: `seq(-1, 1, by=0.1)`)
#' @param sample integer: number of \eqn{x} values in the sample or sample size (default: `rep(1, length(x))`)
#' @param pl polylist: a list of polynomials which describes the probability for \eqn{x} (default: `NULL`)
#' @param tol numeric: tolerance to detect zero values (default: `1e-9`)
#'
#' @return A list with the components:
#' * `p`: the polynomials for the probabilities
#' * `ep`: the expected value as polynomial
#' * `x`: the values for the discrete random variable, the same as the input `x`
#' * `sample`: the sample given or generated
#' * `LS$pi`: the summands for the least squares problem 
#' * `LS$pl`: the summands for the least squares problem in LaTeX
#' * `LS$pf`: the sum of `LS$pi`
#' * `LS$df`: the derivative of `LS$pf`
#' * `LS$pest`: the estimated parameter, minimum of `LS$pf`
#' * `LS$p`: the estimated probabilities
#' * `ML$pi`: the factors for the maximum likelihood problem 
#' * `ML$pl`: the summands for the maximum likelihood problem in LaTeX
#' * `ML$pf`: the product of `ML$pi`
#' * `ML$df`: the derivative of `ML$pf`
#' * `ML$pest`: the estimated parameter, maximum of `ML$pf`
#' * `ML$p`: the estimated probabilities
#' 
#' @importFrom stats rmultinom
#' @importFrom polynom is.polylist as.polylist
#' @export
#'
#' @examples
#' # linear polynomials
#' pprobability(0:2)
#' pprobability(0:2, power=1)
#' # constant polynomials, some NAs are generated
#' pprobability(0:3, power=0)
#' # polynomials generated from a different set
#' pprobability(0:2, coef=seq(-2, 2, by=0.1))
#' pprobability(0:2, 0, coef=seq(-2, 2, by=0.1))
#' # polynomials (x, x, 1-2*x) are used
#' pprobability(0:2, 0, coef=matrix(c(0.4, 0.4, 0.3), ncol=3))
#' pprobability(0:2, 1, coef=polylist(c(0,1), c(0,1), c(1, -2)))
pprobability <- function(x, power=1, zero=FALSE, coef=round(seq(-1, 1, by=0.1), 1), sample=rep(1, length(x)), pl=NULL, tol=1e-9) {
  build_polynomials <- function(x, coef, zero, power) {
    ret <- polylist()
    if (is.polylist(coef)) {
      ret <- coef  
    }
    if (is.matrix(coef)) {
      for (i in 1:length(x)) ret[[i]] <- polynomial(as.numeric(coef[,i]))
    } 
    #
    if (length(ret)!=length(x)) {
      b <- matrix(ddiscrete(runif(length(x)), zero=zero), nrow=1)
      if (!is.matrix(coef)) coef <- matrix(coef, ncol=1)
      if (power) {
        for (i in 1:power) {
          j <- 1+(i%%ncol(coef))
          repeat {
            bi <- sample(as.numeric(coef), length(x)-1, replace=TRUE)
            bi <- c(bi, -sum(bi))
            if (zero || all(abs(bi)>tol)) break
          }
          b <- rbind(b, bi)
        } 
      }
      for (i in 1:length(x)) ret[[i]] <- polynomial(as.numeric(b[,i]))
    }
    ret
  }
  #
  stopifnot(power>=0)
  power <- as.integer(power)
  if (length(sample)==1) {
    repeat{
      xsample <- rmultinom(1, sample, ddiscrete(runif(length(x))))
      if (zero || all(xsample>0)) break
    }
    sample <- xsample
  }
  stopifnot(length(x)==length(sample))
  sample  <- as.integer(sample) 
  plgiven <- is.polylist(coef) || is.matrix(coef)
  repeat {
    # build polynomials
    p  <- build_polynomials(x, coef, zero, power)
    ep <- sum(as.polylist(lapply(1:length(x), function(i) { x[i]*p[[i]] })))
    # KQ
    kqi <- polylist()
    kqj <- rep('', length(x))
    for (i in 1:length(x)) {
      #      kqj[i] <- paste0(sample[i], '\\cdot(', x[i], "-(", toLatex(ep), '))^2')
      kqi[[i]] <- sample[i]*(x[i]-p[[i]])^2
    }
    kq   <- sum(kqi)
    dkq  <- deriv(kq)
    xkq  <- extremes(kq, "min", tol)
    if (length(xkq)<1) xkq <- NA
    if (length(xkq)>1) xkq <- xkq[which.min(predict(kq, xkq))]
    pkq  <- sapply(p, function(pj) { predict(pj, xkq)} )
    # ML
    mli  <- polylist()
    mlj <- rep('', length(x))
    for (i in 1:length(x)) {
      mli[[i]] <- p[[i]]^sample[i]
      #      mlj[j] <- paste0(sample[i], '(', toLatex(p[[i]]), ')^{', sample[i], '}')    
    }
    ml   <- prod(mli)
    dml  <- deriv(ml)
    xml  <- extremes(ml, "max", tol)
    if (length(xml)<1) xml <- NA
    if (length(xml)>1) xml <- xml[which.max(predict(ml, xml))]
    pml  <- sapply(p, function(pj) { predict(pj, xml)} )
    if (plgiven) break
    if (power==0) break
    if ((power>0) && (length(xml)==1) && all(pkq>=0, pkq<=1, pml>=0, pml<=1)) break
  }
  list(p=p, ep=ep, x=x, sample=rep(x, sample),
       LS=list(pi=kqi, pf=kq, dp=dkq, pest=xkq, p=pkq),
       ML=list(pi=mli, pf=ml, dp=dml, pest=xml, p=pml)
  )
}

#' @rdname pprobability
#' @export
# polynomial_probability <- function(...){
#  pprobability(...)}
polynomial_probability <- pprobability