#' pprobability
#'
#' Creates for each value of a discrete random variable a polynomial and estimates the least squares and the maximum likelihood solution.
#' * If `sample` is not given then the sample contains each `x` value once.
#' * If `sample` is an integer then it is interpreted as the sample size and a sample is generated by `rmultinom(1, sample, ddiscrete(runif(length(x))))`.
#' * If `sample` is a vector then it is interpreted such that the according `x[i]` value is `i` times in the sample. Thus `sum(sample)` is the sample size.
#' * If `coeff` is a `polylist` of `length(x)` then these polynomials are taken.
#' * If `coeff` is a `matrix` with `length(x)` columns and `power+1` rows then the columns are interpreted as the coefficients of a polynomial.
#' * Otherwise `coeff` is interpreted as a vector from which the coefficient are sampled. The intercepts are sampled via `ddiscrete(runif(length(x)), zero=zero)`.
#' If `coeff` is not given then it is ensured that the least squares and the maximum likelihood solution exists and the estimated proabilities are between zero and one.
#' Otherwise is the results may contain `NA` or the estimated proabilities are outside the interval \eqn{[0;1]}.
#'
#' @param x numeric: values of a discrete random variable
#' @param power integer: degree for poylnomials (default: `1`), must be larger 0
#' @param zero logical: are zero coefficients and zero sample allowed (default: `FALSE`)
#' @param coef matrix: for each degree coefficients to sample from (default: `seq(-1, 1, by=0.1)`)
#' @param sample integer: number of \eqn{x} values in the sample or sample size (default: `rep(1, length(x))`)
#' @param pl polylist: a list of polynomial which describe the probability for \eqn{x} (default: `NULL`)
#' @param tol numeric: tolerance to detect zero values (default: `1e-9`)
#'
#' @return a list the components:
#' * `p`: the polynomials for the probabilities
#' * `ep`: the expected value as polynomial
#' * `x`: the values for the discrete random variable, the same as the input `x`
#' * `sample`: the sample given or generated
#' * `LS$pi`: the summands for the least squares problem
#' * `LS$pl`: the summands for the least squares problen in LaTeX
#' * `LS$pf`: the sum of `LS$pi`
#' * `LS$df`: the derivative of `LS$pf`
#' * `LS$pest`: the estimated parameter, minimum of `LS$pf`
#' * `LS$p`: the estimated probabilities
#' * `ML$pi`: the factors for the maximum likelihood problem
#' * `ML$pl`: the summands for the maximum likelihood problem in LaTeX
#' * `ML$pf`: the product of `ML$pi`
#' * `ML$df`: the derivative of `ML$pf`
#' * `ML$pest`: the estimated parameter, maximum of `ML$pf`
#' * `ML$p`: the estimated probabilities
#'
#' @importFrom stats rmultinom
#' @importFrom polynom is.polylist as.polylist
#' @export
#'
#' @examples
#' # linear polynomials
#' pprobability(0:2)
#' pprobability(0:2, power=1)
#' # constant polynomials, some NAs are generated
#' pprobability(0:3, power=0)
#' # polynomials generated from a different set
#' pprobability(0:2, coef=seq(-2, 2, by=0.1))
#' pprobability(0:2, 0, coef=seq(-2, 2, by=0.1))
#' # polynomials (x, x, 1-2*x) are used
#' pprobability(0:2, 0, coef=matrix(c(0.4, 0.4, 0.3), ncol=3))
#' pprobability(0:2, 1, coef=polylist(c(0,1), c(0,1), c(1, -2)))
pprobability <- function(x, power=1, zero=FALSE, coef=round(seq(-1, 1, by=0.1), 1), sample=rep(1, length(x)), pl=NULL, tol=1e-9) {
build_polynomials <- function(x, coef, zero, power) {
ret <- polylist()
if (is.polylist(coef)) {
ret <- coef
}
if (is.matrix(coef)) {
for (i in 1:length(x)) ret[[i]] <- polynomial(as.numeric(coef[,i]))
}
#
if (length(ret)!=length(x)) {
b <- matrix(ddiscrete(runif(length(x)), zero=zero), nrow=1)
if (!is.matrix(coef)) coef <- matrix(coef, ncol=1)
if (power) {
for (i in 1:power) {
j <- 1+(i%%ncol(coef))
repeat {
bi <- sample(as.numeric(coef), length(x)-1, replace=TRUE)
bi <- c(bi, -sum(bi))
if (zero || all(abs(bi)>tol)) break
}
b <- rbind(b, bi)
}
}
for (i in 1:length(x)) ret[[i]] <- polynomial(as.numeric(b[,i]))
}
ret
}
#
stopifnot(power>=0)
power <- as.integer(power)
if (length(sample)==1) {
repeat{
xsample <- rmultinom(1, sample, ddiscrete(runif(length(x))))
if (zero || all(xsample>0)) break
}
sample <- xsample
}
stopifnot(length(x)==length(sample))
sample <- as.integer(sample)
plgiven <- is.polylist(coef) || is.matrix(coef)
repeat {
# build polynomials
p <- build_polynomials(x, coef, zero, power)
ep <- sum(as.polylist(lapply(1:length(x), function(i) { x[i]*p[[i]] })))
# KQ
kqi <- polylist()
kqj <- rep('', length(x))
for (i in 1:length(x)) {
# kqj[i] <- paste0(sample[i], '\\cdot(', x[i], "-(", toLatex(ep), '))^2')
kqi[[i]] <- sample[i]*(x[i]-p[[i]])^2
}
kq <- sum(kqi)
dkq <- deriv(kq)
xkq <- extremes(kq, "min", tol)
if (length(xkq)<1) xkq <- NA
if (length(xkq)>1) xkq <- xkq[which.min(predict(kq, xkq))]
pkq <- sapply(p, function(pj) { predict(pj, xkq)} )
# ML
mli <- polylist()
mlj <- rep('', length(x))
for (i in 1:length(x)) {
mli[[i]] <- p[[i]]^sample[i]
# mlj[j] <- paste0(sample[i], '(', toLatex(p[[i]]), ')^{', sample[i], '}')
}
ml <- prod(mli)
dml <- deriv(ml)
xml <- extremes(ml, "max", tol)
if (length(xml)<1) xml <- NA
if (length(xml)>1) xml <- xml[which.max(predict(ml, xml))]
pml <- sapply(p, function(pj) { predict(pj, xml)} )
if (plgiven) break
if (power==0) break
if ((power>0) && (length(xml)==1) && all(pkq>=0, pkq<=1, pml>=0, pml<=1)) break
}
list(p=p, ep=ep, x=x, sample=rep(x, sample),
LS=list(pi=kqi, pf=kq, dp=dkq, pest=xkq, p=pkq),
ML=list(pi=mli, pf=ml, dp=dml, pest=xml, p=pml)
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.