R/expectStr.R

Defines functions one_combination one_grouping vect_to_one str_to_moment combine_coef all_groups coef_denom coef_numer calculate_coef sort_str add_letter perm_categories groups one_forward

Documented in one_combination

#=========================================================================#
# functions to generate expressions for:                                  #
# 	EDGEWORTH EXPANSIONS,                                                 #
#   UNBIASED MOMENT ESTIMATES (including products and powers of moments)  #
# E(Xbar^k1*X2bar^k2*X3bar^k3*...)                                        #
# powvect: c(k1, k2, k3, ...), k's can be 0 if needed,                    #
#          e.g. E(Xbar^k1*X3bar^k3) -> c(k1, 0, k3)                       #
#=========================================================================#

# sorting the groups in a specific way;
# this function creates a next grouping from a given one
# from 4 3 3 2 2 1  get 4 3 3 3 1 1
one_forward <- function(vect) {
    m <- sum(vect)
    if (length(vect) == 1) return(vect)
    for (i in (length(vect) - 1):1) {
        if (i == 1) {
            vect[i] <- vect[i] + 1
            return(c(vect[1:i], rep(1, m - sum(vect[1:i]))))
        }
        else if (vect[i - 1] > vect[i]) {
            vect[i] <- vect[i] + 1
            return(c(vect[1:i], rep(1, m - sum(vect[1:i]))))
        }
    }
}

# creates a list of all possible groupings using our sorting rule
groups <- function(nelem) { # number of elements, sum(powvect)
    l <- list(vect <- rep(1, nelem))
    while (length(vect) > 1) {
        vect <- one_forward(vect)
        l <- c(l, list(vect))
    }
    return(l)
}

# all the combinations for letters where "a" is Xbar, "b" is X2bar and so on
# input is a vector of powXbar (# of a's), powX2bar (# of b's), powX3bar, ...
perm_categories <- function(powvect) {
    K <- length(powvect)
    nall <- sum(powvect)
    prev_vect <- paste(rep(0, nall), collapse = "")
    spaces_left <- c(nall - c(0, cumsum(powvect)[-K]))
    for (k in 1:K) {
        ind_next <- combn(1:spaces_left[k], powvect[k])
        prev_vect <- add_letter(ind_next, prev_vect, letters[k])
    }
    return(prev_vect)
}

# helper for perm_categories()
add_letter <- function(ind_next, prev_vect, letter) {
    next_vect <- matrix(nrow = length(prev_vect), ncol = ncol(ind_next))
    for (i in 1:length(prev_vect)) {
        for (j in 1:ncol(ind_next)) {
            old <- strsplit(prev_vect[i], split = "")[[1]]
            not_filled <- which(old == "0")  # 0's originally
            old[not_filled[ind_next[, j]]] <- letter
            next_vect[i, j] <- paste(old, collapse = "")
        }
    }
    return(as.character(next_vect))
}

# sort letters in a string - needed to compare strings
sort_str <- function(str) {
    return(paste(sort((strsplit(str, split = "")[[1]])), collapse = ""))
}

# calculate coefficient for one grouping
# elements sorted, K is length(powvect); the biggest letter is letters[K]
# excl is a vector (grep class) to remove letters and leave only one
calculate_coef <- function(str_vect, K, excl) {
    coef_numer(str_vect, K, excl)/coef_denom(str_vect)
}

# numerator for calculate_coef()
coef_numer <- function(str_vect, K, excl) {
    coefs <- matrix(nrow = K, ncol = length(str_vect))
    for (k in 1:K) {
        nn <- nchar(gsub(excl[k], "", str_vect))
        cn <- c(0, cumsum(nn))[- (length(nn) + 1)]  # 0 in front, remove last
        coefs[k, ] <- choose(sum(nn) - cn, nn)
    }
    return(prod(coefs))
}

# denominator for calculate_coef()
# permutations of repeated groups
# str_vect has to be sorted - strings and groups
coef_denom <- function(str_vect) {
    if (length(str_vect) == 1) return(1)
    repeats <- duplicated(str_vect)
    k <- 1
    multiples <- NULL
    for (i in 2:(length(str_vect))) {
        if (repeats[i]) k <- k + 1
        else {
            multiples <- c(multiples, k)
            k <- 1
        }
    }
    multiples <- c(multiples, k)
    return(prod(factorial(multiples)))
}

# table of all the groups and their coefficients (number of occurences)
all_groups <- function(powvect) {
    K <- length(powvect)
    m <- sum(powvect)
    groupings <- groups(m)                            # list
    perms     <- perm_categories(powvect)             # vector of strings
    excl <- paste("[^", letters[1:K], "]", sep = "")  # letters to exclude

    mat <- matrix("", ncol = m + 2)
    for (g in 1:length(groupings)) {
        grp <- groupings[[g]]
        for (p in 1:length(perms)) {
            permgroup <- substring(perms[p],          # break the string
                                   c(1, cumsum(grp[-length(grp)]) + 1),
                                   cumsum(grp))
            if (any(permgroup == "a")) next           # check order
            newrow <- sort(sapply(permgroup, sort_str))
            # sorted strings and sorted vector
            mat <- rbind(mat, c(newrow, rep("", m - length(newrow)),
                                length(newrow),
                                calculate_coef(newrow, K, excl)))
        }
    }
    all_empty <- apply(mat, 2, function(s) all(s == ""))
    mat <- unique(mat[, !all_empty])[-1, ]
    if (class(mat) != "matrix")
        mat <- matrix(mat, nrow = 1)                  # if vector
    colnames(mat) <- c(paste("group", 1:(ncol(mat) - 2)), "d", "coef")
    return(mat)
}

# takes output of all_groups()
# subscript of mu is the order (moment)
# combines same order combinations and adds coefficients
combine_coef <- function(char_mat) {
    if (all(char_mat == "")) return(char_mat)
    J <- ncol(char_mat)
    if (all(dim(char_mat) == c(1, 3)))
        return(data.frame(mu = str_to_moment(char_mat[1, 1]), k = 1, coef = 1))
    num_mat <- t(apply(char_mat[, 1:(J-2)], 1, str_to_moment))
    mu_vect <- apply(num_mat, 1, vect_to_one)
    coef_vect <- tapply(char_mat[, "coef"], mu_vect,     # add coefficients
                        function(x) sum(as.numeric(x)))
    d_vect    <- tapply(char_mat[, "d"], mu_vect, unique)
    df_d <- data.frame(mu = names(d_vect), d = d_vect)
    df_coef <- data.frame(mu = names(coef_vect), coef = coef_vect)
    return(merge(df_d, df_coef, by = "mu"))
}

# helper for combine_coef()
# order of each group, eg "aacdd" has order 13 (mu13)
str_to_moment <- function(str_vect) {
    sapply(str_vect, function(str) {
        sum(match(strsplit(str, split = "")[[1]], letters))
    })
}
# helper for combine_coef()
# representation of moments (orders) in a grouping in single string
vect_to_one <- function(vect) {
    vect <- sort(vect[vect != 0])
    return(paste(vect, collapse = ":"))
}

#-------------------------------------------------------------------------#
#              Generate an expression string for Sage/SymPy:              #
#                    SR(expr_str) or sympify(expr_str)                    #
#-------------------------------------------------------------------------#

# inputs: vector of three elements (mu, d, and coef),
#         sample size notation (choose_from)
one_grouping <- function(vect3elem, smpsize) {
    if (all(vect3elem == "")) return("")
    mu <- strsplit(vect3elem[1], split = ":")
    mutab <- rev(table(mu))
    muexpr <- paste("mu", names(mutab), "^", mutab, "*",
                    sep = "", collapse = "")
    muexpr <- substr(muexpr, start = 1, stop = nchar(muexpr) - 1)

    k <- as.numeric(vect3elem[2])
    nexpr <- smpsize
    if (k > 1) {
        for (i in 1:(k-1)) {
            nexpr <- paste(nexpr, "*(", smpsize, "-", i, ")",
                           sep = "", collapse = "")
        }
    }
    coef_n_mu <- paste(vect3elem[3], "*", nexpr, "*", muexpr, " + ",
                       sep = "", collapse = "")
    return(coef_n_mu)
}

#' Generate symbolic expression for expectation
#'
#' Generate a string with symbolic expression for expectation of powers and
#' products of non-central (raw) sample moments of an arbitrary order.
#'
#' For a zero-mean random variable \code{X} and a sample \eqn{X_1, \dots,
#' X_n}{X[1], ..., X[n]}, find \eqn{E(\bar{X}^{j_1} \overline{X^2}^{j_2}
#' \overline{X^3}^{j_3} \cdots \overline{X^m}^{j_m})}{E(X-bar^{j_1}
#' {X^2}-bar^{j[2]} \cdots {X^m}-bar^{j[m]})}, where \eqn{overline{X^k} = 1/n
#' \sum_{i = 1}^n X_i^{k}}{{X^k}-bar = sum_{i = 1}^n X[i]^k} is a \eqn{k}'th
#' non-central sample moment. The expression is given in terms of sample size
#' and true moments \eqn{\mu_k}{\mu[k]} of \eqn{X}. These expectations can
#' subsequently be used for generating unbiased central moment estimators of an
#' arbitrary order, Edgeworth expansions, and possibly solving other
#' higher-order problems.
#'
#' @param powvect vector of \eqn{j_1, \dots, j_m}{j[1], ..., j[m]} (see
#'   "Details") of exponents for non-central moments. The index of an element of
#'   this vector indicates a corresponding order, e.g. for \eqn{E(\bar{X}^5
#'   X^4)}{E(X-bar^5 X^4)}, \code{powvect = c(5, 0, 0, 1)}. Thus the vector will
#'   have \code{m} elements if \code{m} is the highest order.
#' @param smpsize symbol to be used for sample size.
#'
#' @return A string representing a symbolic expression for further processing
#'   using computer algebra (e.g. with \emph{Sage} or \emph{SymPy}), for
#'   calculating numeric values, or to be rendered with \emph{Latex}.
#'
#' @examples
#' one_combination(c(5, 0, 2, 1))
#'
#' @export
#'
one_combination <- function(powvect, smpsize = "n") {
    if (!length(powvect))                        return("1")
    if (powvect[1] == 1 & sum(powvect[-1]) == 0) return("0")
    if (!sum(powvect))                           return("1")  # all 0's

    res <- all_groups(powvect)
    combined_res <- combine_coef(res)
    combined_res[, 1] <- as.character(combined_res[, 1])
    vect <- apply(combined_res, 1, one_grouping, smpsize = smpsize)
    str <- paste(vect, collapse = "")
    return(paste(" (", substr(str, start = 1, stop = nchar(str) - 2),
                 ") / ", smpsize, "^", sum(powvect), sep = ""))
}
innager/Umoments documentation built on April 17, 2018, 6:20 p.m.