Nothing
#' Compute probability of a path from root
#'
#' Internal function to compute probability of a path. It does not
#' check the validity of the path.
#' @param object An object of class \code{sevt}.
#' @param x the path, expressed
#' as a character vector containing the sequence of the value of the variables.
#' @param log logical, if \code{TRUE} log-probability is returned.
#' @return The probability of the given path or its logarithm if \code{log=TRUE}.
#' @details Computes the probability of following a given path (\code{x}) starting from the root.
#' Can be a full path from the root to a leaf or a shorter path.
#' @keywords internal
path_probability <-
function(object, x, log = FALSE) {
check_sevt_prob(object)
vs <- sevt_varnames(object)
if (!is.null(names(x))) {
# if it's a named vector just order it
x <- x[vs]
}
# start computing the log probability with first variable
l <- log(object$prob[[vs[1]]][[1]][x[1]])
if (length(x) > 1) {
for (i in 2:length(x)) {
# get corresponding stage
s <- find_stage(object, x[1:(i - 1)])
# and add log-prob
l <- l + log(object$prob[[vs[i]]][[s]][x[i]])
}
}
# return log prob or prob as requested
if (log) {
return(l)
} else {
return(exp(l))
}
}
#' Probabilities for a staged event tree
#'
#' Compute (marginal and/or conditional) probabilities of elementary
#' events with respect
#' to the probability encoded in a staged event tree.
#' @param object an object of class \code{sevt} with probabilities.
#' @param x the vector or data.frame of observations.
#' @param conditional_on named vector, the conditioning event.
#' @param log logical, if \code{TRUE} log-probabilities are returned.
#' @param na0 logical, if \code{NA} should be converted to 0.
#' @return the probabilities to observe each observation in \code{x}, possibly
#' conditional on the event(s) in \code{conditional_on}.
#'
#' @details Computes probabilities related to a vector or a
#' data.frame of observations.
#'
#' Optionally, conditional probabilities can be obtained by specifying
#' the conditioning event in \code{conditional_on}. This can be done either
#' with a single named vector or with a data.frame object with the
#' same number of rows of \code{x}. In the former, the same conditioning
#' is used for all the computed probabilities (if \code{x} has multiple rows);
#' while with the latter different conditioning events (but on the same variables)
#' can be specified for each row of \code{x}.
#'
#' @examples
#' data(Titanic)
#' model <- full(Titanic, lambda = 1)
#' samples <- expand.grid(model$tree[c(1, 4)])
#' pr <- prob(model, samples)
#' ## probabilities sum up to one
#' sum(pr)
#' ## print observations with probabilities
#' print(cbind(samples, probability = pr))
#'
#' ## compute one probability
#' prob(model, c(Class = "1st", Survived = "Yes"))
#'
#' ## compute conditional probability
#' prob(model, c(Survived = "Yes"), conditional_on = c(Class = "1st"))
#'
#' ## compute conditional probabilities with different conditioning set
#' prob(model, data.frame(Age = rep("Adult", 8)),
#' conditional_on = expand.grid(model$tree[2:1]))
#' ## the above should be the same as
#' summary(model)$stages.info$Age
#' @export
prob <- function(object, x, conditional_on = NULL, log = FALSE, na0 = TRUE) {
check_sevt_prob(object)
if (is.null(dim(x))) {
x <- as.data.frame(t(x))
}
p1 <- 1
if (!is.null(conditional_on)){
if (is.vector(conditional_on) && !is.null(names(conditional_on))){
## check if same names
if (any(names(x) %in% names(conditional_on))){
stop("invalid argument, x and conditional_on names should be disjoint")
}
x <- cbind(x, as.data.frame(t(conditional_on)))
p1 <- prob(object, x = conditional_on, log = FALSE, na0 = na0)
}else if (is.data.frame(conditional_on)){
## check if same names
if (any(names(x) %in% names(conditional_on))){
stop("invalid argument, x and conditional_on names should be disjoint")
}
x <- cbind(x, conditional_on)
p1 <- prob(object, x = conditional_on, log = FALSE, na0 = na0)
}else{
stop("invalida argument, conditional_on must be NULL,
a named vector or a data.frame")
}
}
# get dimensions and variables
n <- nrow(x)
i <- ncol(x)
# get variables in the model
var <- names(object$tree)
# variables of the model that are in x
var1 <- var[var %in% colnames(x)]
# index of last variable that appears in x
k <- which(var %in% var1[length(var1)])
res <- vapply(
1:n,
FUN.VALUE = 1,
FUN = function(i) {
ll <- object$tree[1:k]
ll[var1] <- vapply(x[i, var1], as.character, FUN.VALUE = "aaa")
sum(apply(
expand.grid(ll),
MARGIN = 1,
FUN = function(xx) {
path_probability(object, as.character(xx), log = FALSE)
}
), na.rm = TRUE)
}
)
res <- res / p1
if (na0) res[is.na(res)] <- 0
if (log) {
return(log(res))
} else {
return(res)
}
}
#' Log-Likelihood of a staged event tree
#'
#' Compute, or extract the log-likelihood of a staged event tree.
#' @param object an fitted object of class \code{sevt}.
#' @param ... additional parameters (compatibility).
#' @return An object of class \code{\link{logLik}}.
#' @importFrom stats logLik
#' @export
#' @examples
#' data("PhDArticles")
#' mod <- indep(PhDArticles)
#' logLik(mod)
logLik.sevt <- function(object, ...) {
if (!is.null(object$ll)) {
return(object$ll)
}
check_sevt_fit(object)
vars <- names(object$tree)
prob <- expand_prob(object)
ll <- sum(vapply(
seq_along(object$tree),
FUN = function(i) {
if (any(is.nan(prob[[vars[i]]]) &
object$ctables[[vars[i]]] > 0)) {
return(-Inf)
}
ix <-
prob[[vars[i]]] > 0 & !is.na(prob[[vars[i]]]) &
!is.nan(prob[[vars[i]]])
sum(log(prob[[vars[i]]][ix]) *
object$ctables[[vars[i]]][ix])
},
FUN.VALUE = 1
))
attr(ll, "df") <-
sum(c(1, vapply(
object$stages[ vars[-1] ],
FUN = function(x) {
length(unique(x))
},
FUN.VALUE = 1
)) *
(vapply(
object$tree,
FUN = length, FUN.VALUE = 1
) - 1)) ## compute the degree of freedom
attr(ll, "nobs") <- sum(object$ctables[[vars[1]]])
class(ll) <- "logLik"
return(ll)
}
#' Confidence intervals for staged event tree parameters
#'
#' Confint method for class \code{sevt}.
#'
#' @param object an object of class \code{sevt}.
#' @param parm a specification of which parameters are to be given
#' confidence intervals, either a vector of numbers
#' or a vector of names. If missing, all parameters are considered.
#' @param level the confidence level required.
#' @param method a character string specifing which method to use:
#' wald", "waldcc", "goodman", "quesenberry-hurst" or "wilson".
#' @param ignore vector of stages which will be ignored,
#' by default the name of the unobserved stages stored in
#' \code{object$name_unobserved}.
#' @param ... additional argument(s) for compatibility
#' with \code{confint} methods.
#' @details Compute confidence intervals for staged event trees.
#' Currently five methods are available:
#' * \code{wald}, \code{waldcc}: Wald method and with continuity correction.
#' * \code{wilson}, \code{quesenberry-hurst} and \code{goodman}.
#' @return A matrix with columns giving lower and upper confidence
#' limits for each parameter. These will be labelled as
#' \code{(1-level)/2} and \code{1 - (1-level)/2} in %
#' (by default 2.5% and 97.5%).
#' @references Goodman, L. A. (1965) On Simultaneous Confidence Intervals for
#' Multinomial Proportions Technometrics, 7, 247-254.
#' @references Wald, A. Tests of statistical hypotheses concerning several
#' parameters when the number of observations is large, Trans.
#' Am. Math. Soc. 54 (1943) 426-482.
#' @references Wilson, E. B. Probable inference, the law of succession and
#' statistical inference, J.Am. Stat. Assoc. 22 (1927) 209-212.
#' @references Quesenberry, C., & Hurst, D. (1964). Large Sample Simultaneous
#' Confidence Intervals for Multinomial Proportions.
#' Technometrics, 6(2), 191-195
#' @author The function is partially inspired by code in the
#' \code{MultinomCI} function from the \pkg{DescTools} package,
#' implemented by Andri Signorelli and Pablo J. Villacorta Iglesias.
#' @examples
#' m1 <- stages_bj(full(PhDArticles), distance = "kullback", thr = 0.01)
#' confint(m1, "Prestige", level = 0.90)
#' confint(m1, "Married", method = "goodman")
#' confint(m1, c("Married", "Kids"))
#' @importFrom stats confint
#' @importFrom stats qchisq
#' @export
confint.sevt <- function (object, parm, level = 0.95,
method = c( "wald", "waldcc", "wilson", "goodman",
"quesenberry-hurst"),
ignore = object$name_unobserved,
...) {
check_sevt_fit(object)
vv <- sevt_varnames(object)
if (missing(parm)){
parm <- vv
}else if (is.numeric(parm)){
parm <- vv[parm]
}
## order parm and remove NA
parm <- vv[vv %in% parm]
## expand parm
exparm <- unlist(sapply(parm, function(v){
s <- unique(object$stages[[v]])
s <- s[!(s %in% ignore)]
if (is.null(s)) s <- "1"
apply(expand.grid(object$tree[[v]],s),1, function(x){
paste0(v,"=", x[1],"|", x[2])
})
}, simplify = FALSE), use.names = FALSE)
if (missing(method)) {
method <- "wald"
}
lambda <- object$lambda
method <- match.arg(arg = method, choices = c("wald", "waldcc", "goodman",
"wilson", "quesenberry-hurst"))
a <- (1 - level)/2
a <- c(a, 1 - a)
pct <- paste0(format(100*a,digits = 3, ), " %")
ci <- array(NA_real_, dim = c(length(exparm), 2L),
dimnames = list(exparm, pct))
for(v in parm) {
stages <- unique(object$stages[[v]])
stages <- stages[!(stages %in% ignore)]
if (is.null(stages)) stages <- "1"
k <- length(object$tree[[v]])
for(s in stages) {
p <- object$prob[[v]][[s]]
n <- attr(p, "n")
n_stage <- p * (n + k * lambda) - lambda
if (lambda > 0) p <- n_stage / n #unbiased prob estimate
if (method == "wald") {
q.chi <- qchisq(level, 1)
lci <- p - sqrt(q.chi * p * (1 - p)/n)
uci <- p + sqrt(q.chi * p * (1 - p)/n)
}else if (method == "waldcc") {
## wald test with continuity correction
q.chi <- qchisq(level, 1)
lci <- p - sqrt(q.chi * p * (1 - p)/n) - 1/(2 * n)
uci <- p + sqrt(q.chi * p * (1 - p)/n) + 1/(2 * n)
}else if (method == "wilson") {
## Wilson (1927), page 210 (or 76) (JSTOR version)
q.chi <- qchisq(level, 1)
tmp <- sqrt(q.chi^2 + 4 * n_stage * q.chi * (1 - p))
lci <- (q.chi + 2 * n_stage - tmp)/(2 * (n + q.chi))
uci <- (q.chi + 2 * n_stage + tmp)/(2 * (n + q.chi))
}else if (method == "quesenberry-hurst") {
## Goodman (1965), page 247 (JSTOR version)
## Quesenberry, C., & Hurst, D (1964)
q.chi <- qchisq(1 - level, k - 1, lower.tail = FALSE)
tmp <- sqrt(q.chi^2 + 4 * n_stage * q.chi * (1 - p))
lci <- (q.chi + 2 * n_stage - tmp)/(2 * (n + q.chi))
uci <- (q.chi + 2 * n_stage + tmp)/(2 * (n + q.chi))
}else if (method == "goodman") {
## Goodman (1965), page 248 (JSTOR version)
## upper quantile
q.chi <- qchisq( (1 - level) / k, 1, lower.tail = FALSE)
tmp <- sqrt(q.chi^2 + 4 * n_stage * q.chi * (1 - p))
lci <- (q.chi + 2 * n_stage - tmp)/(2 * (n + q.chi))
uci <- (q.chi + 2 * n_stage + tmp)/(2 * (n + q.chi))
}
if (lambda > 0){
## correct ci for biased estimator, as E. Riccomagno notes
lci <- (n * lci + lambda) / (n + k * lambda)
uci <- (n * uci + lambda) / (n + k * lambda)
}
ci[paste0(v,"=", object$tree[[v]],"|", s),1] <- pmax(0,lci)
ci[paste0(v,"=", object$tree[[v]],"|", s),2] <- pmin(1,uci)
}
}
return(ci)
}
#' Likelihood Ratio Test for staged trees models
#'
#' Function to perform likelihood ratio test between
#' two or multiple staged event tree models.
#' @param object an object of class \code{\link{sevt}}.
#' @param ... further objects of class \code{\link{sevt}}.
#' Must specify super-models of \code{object}.
#' See below for details.
#' @details If a single object of class \code{sevt} is passed as
#' argument, it computes
#' the likelihood-ratio test with respect to the
#' independence model.
#' If multiple objects are passed,
#' likelihood-ratio tests between the first
#' object and the followings are computed.
#' In the latter casem the function checks automatically if
#' the first model is nested in the additional ones,
#' via \code{\link{inclusions_stages}}, and throws
#' an error if not.
#' @return An object of class \code{anova}
#' which contains the log-likelihood,
#' degrees of freedom,
#' difference in degrees of freedom, likelihood ratio
#' statistics and corresponding p values.
#' @examples
#' data(PhDArticles)
#' order <- c("Gender", "Kids", "Married", "Articles")
#' phd.mod1 <- stages_hc(indep(PhDArticles, order))
#' phd.mod2 <- stages_hc(full(PhDArticles, order))
#'
#' ## compare two nested models
#' lr_test(phd.mod1, phd.mod2)
#'
#' ## compare a single model vs the independence model
#' lr_test(phd.mod1)
#' @importFrom stats pchisq
#' @export
lr_test <- function(object, ...) {
check_sevt_fit(object)
others <- list(...)
nmodels <- length(others) + 1
variables <- lapply(match.call()[-1L], deparse)[1L:nmodels]
if (nmodels == 1){
base <- sevt(object$tree)
base$ctables <- object$ctables
base$lambda <- object$lambda
base <- sevt_fit(base)
others <- list(object)
object <- base
variables <- c("indep", variables)
nmodels <- 2
}
if (object$lambda != 0){
warning("parameters are not fitted with maximum-likelihood")
}
others_sts <- lapply(others, function(object2){
check_sevt_fit(object2)
stopifnot(sevt_nvar(object) == sevt_nvar(object2))
stopifnot(all(sevt_varnames(object) == sevt_varnames(object2)))
if (object2$lambda != 0){
warning("parameters are not fitted with maximum-likelihood")
}
# check nested models
incl_st <- inclusions_stages(object, object2)
for(i in 1:length(incl_st)) {
if(any(incl_st[[i]][, 2] %in% c("!=", "<"))){
stop(paste(c("Not nested models specified. Check stages structures for ",
names(incl_st)[i])), call. = FALSE)
}
}
logLik(object2)
})
ls <- vapply(object$tree, length, 1)
moddesc <- paste(paste0(names(object$tree), "[", ls, "] "), collapse = "-> ")
L1 <- logLik(object)
L2 <- sapply(others_sts, as.numeric)
df1 <- attr(L1, "df")
df2 <- sapply(others_sts, attr, which = "df")
rval <- matrix(NA, ncol = 5, nrow = nmodels)
rownames(rval) <- 1:nmodels
colnames(rval) <- c("#Df", "LogLik", "Df", "Chisq", "Pr(>Chisq)")
rval[,1] <- c(df1, df2)
rval[,2] <- c(L1, L2)
rval[-1,3] <- df2 - df1
rval[-1,4] <- 2*(L2 - L1)
rval[-1,5] <- pchisq(2 * (L2 - L1), df = abs(df2 - df1), lower.tail = FALSE)
topnote <- paste("Model ", format(1:nmodels), ": ", variables,
sep = "", collapse = "\n")
structure(as.data.frame(rval), heading = c("Likelihood-ratio test \n", moddesc, topnote),
class = c("anova", "data.frame"))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.