R/lav_tables.R

Defines functions lav_tables_cells_format lav_tables_table_format lav_tables_resp_pi lav_tables_pairwise_sample_pi_cor lav_tables_pairwise_sample_pi lav_tables_pairwise_model_pi lav_tables_pairwise_freq_cell lav_tables_oneway lav_tables_pairwise_table lav_tables_stat_X2 lav_tables_stat_G2 lav_tables_pairwise_cells lav_tables_pattern lavTables

Documented in lavTables

# construct 1D, 2D or pattern-based frequency tables
# YR. 10 April 2013
# Notes:
# - we do NOT make a distinction here between unordered and ordered categorical
#   variables
# - object can be a matrix (most likely with integers), a full data frame,
#   a fitted lavaan object, or a lavData object
# - 11 May 2013: added collapse=TRUE, min.std.resid options (suggested
#   by Myrsini Katsikatsou
# - 11 June 2013: added dimension, to get one-way and two-way (three-way?)
#   tables
# - 20 Sept 2013: - allow for sample-based or model-based cell probabilities
#                   re-organize/re-name to provide a more consistent interface
#                   rows in the output can be either: cells, tables or patterns
#                 - dimension=0 equals type="pattern
#                 - collapse=TRUE is replaced by type="table"
#                 - changed names of statistics: std.resid is now GR.average
#                 - added many more statistics; some based on the model, some
#                   on the unrestricted model
# - 8 Nov 2013:   - skip empty cells for G2, instead of adding 0.5 to obs
# - 7 Feb 2016:   - take care of conditional.x = TRUE

lavTables <- function(object,
                      # what type of table?
                      dimension    = 2L,
                      type         = "cells",
                      # if raw data, additional attributes
                      categorical  = NULL,
                      group        = NULL,
                      # which statistics / fit indices?
                      statistic    = "default",
                      G2.min       = 3.0,        # needed for G2.{p/n}large
                      X2.min       = 3.0,        # needed for X2.{p/n}large
                      # pvalues for statistics?
                      p.value      = FALSE,
                      # Bonferonni
                      # alpha.adj    = FALSE,
                      # output format
                      output       = "data.frame",
                      patternAsString = TRUE) {

    # check input
    if(! (dimension == 0L || dimension == 1L || dimension == 2L) ) {
        stop("lavaan ERROR: dimension must be 0, 1 or 2 for pattern, one-way or two-way tables")
    }
    stopifnot(type %in% c("cells", "table", "pattern"))
    if(type == "pattern") {
        dimension <- 0L
    }

    # extract or create lavdata
    lavdata <- lavData(object, ordered = categorical, group = group)

    # is 'object' a lavaan object?
    lavobject <- NULL
    if(inherits(object, "lavaan")) {
        lavobject <- object
    }

    # case 1: response patterns
    if(dimension == 0L) {
        out <- lav_tables_pattern(lavobject = lavobject, lavdata = lavdata,
                                  statistic = statistic,
                                  patternAsString = patternAsString)
        # output format
        if(output == "data.frame") {
            class(out) <- c("lavaan.data.frame", "data.frame")
        } else {
            warning("lavaan WARNING: output option `", output, "' is not available; ignored.")
        }

    # case 2: one-way/univariate
    } else if(dimension == 1L) {
        out <- lav_tables_oneway(lavobject = lavobject, lavdata = lavdata,
                                 statistic = statistic)

        # output format
        if(output == "data.frame") {
            class(out) <- c("lavaan.data.frame", "data.frame")
        } else {
            warning("lavaan WARNING: output option `", output, "' is not available; ignored.")
        }

    # case 3a: two-way/pairwise/bivariate + cells
    } else if(dimension == 2L && type == "cells") {
        out <- lav_tables_pairwise_cells(lavobject = lavobject,
                                         lavdata   = lavdata,
                                         statistic = statistic)
        # output format
        if(output == "data.frame") {
            class(out) <- c("lavaan.data.frame", "data.frame")
        } else if(output == "table") {
            out <- lav_tables_cells_format(out, lavdata = lavdata)
        } else {
            warning("lavaan WARNING: output option `", output, "' is not available; ignored.")
        }

    #  case 3b: two-way/pairwise/bivariate + collapsed table
    } else if(dimension == 2L && (type == "table" || type == "tables")) {
        out <- lav_tables_pairwise_table(lavobject = lavobject,
                                         lavdata   = lavdata,
                                         statistic = statistic,
                                         G2.min    = G2.min,
                                         X2.min    = X2.min,
                                         p.value   = p.value)
        # output format
        if(output == "data.frame") {
            class(out) <- c("lavaan.data.frame", "data.frame")
        } else if(output == "table") {
            out <- lav_tables_table_format(out, lavdata = lavdata,
                                           lavobject = lavobject)
        } else {
            warning("lavaan WARNING: output option `", output, "' is not available; ignored.")
        }
    }

    if( (is.data.frame(out) && nrow(out) == 0L) ||
        (is.list(out) && length(out) == 0L)) {
        # empty table (perhaps, no categorical variables)
        return(invisible(out))
    }

    out
}

# shortcut, always dim=2, type="cells"
#lavTablesFit <- function(object,
#                         # if raw data, additional attributes
#                         categorical  = NULL,
#                         group        = NULL,
#                         # which statistics / fit indices?
#                         statistic    = "default",
#                         G2.min       = 3.0,
#                         X2.min       = 3.0,
#                         # pvalues for statistics?
#                         p.value      = FALSE,
#                         # output format
#                         output       = "data.frame") {
#
#    lavTables(object = object, dimension = 2L, type = "table",
#              categorical = categorical, group = group,
#              statistic = statistic,
#              G2.min = G2.min, X2.min = X2.min, p.value = p.value,
#              output = output, patternAsString = FALSE)
#}

#lavTables1D <- function(object,
#                        # if raw data, additional attributes
#                        categorical  = NULL,
#                       group        = NULL,
#                       # which statistics / fit indices?
#                        statistic    = "default",
#                        # output format
#                        output = "data.frame") {
#
#   lavTables(object = object, dimension = 1L,
#              categorical = categorical, group = group,
#              statistic = statistic, p.value = FALSE,
#              output = output, patternAsString = FALSE)
#}


lav_tables_pattern <- function(lavobject = NULL, lavdata = NULL,
                               statistic = NULL, patternAsString = TRUE) {

    # this only works if we have 'categorical' variables
    cat.idx <- which(lavdata@ov$type %in% c("ordered","factor"))
    if(length(cat.idx) == 0L) {
        warning("lavaan WARNING: no categorical variables are found")
        return(data.frame(pattern=character(0L), nobs=integer(0L),
                          obs.freq=integer(0L), obs.prop=numeric(0L)))
    }
    # no support yet for mixture of endogenous ordered + numeric variables
    if(!is.null(lavobject) &&
       length(lavNames(lavobject, "ov.nox")) > length(cat.idx)) {
        warning("lavaan WARNING: some endogenous variables are not categorical")
        return(data.frame(pattern=character(0L), nobs=integer(0L),
                          obs.freq=integer(0L), obs.prop=numeric(0L)))
    }

    # default statistics
    if(!is.null(lavobject)) {
        if(length(statistic) == 1L && statistic == "default") {
            statistic <- c("G2", "X2")
        } else {
            stopifnot(statistic %in% c("G2.un", "X2.un", "G2", "X2"))
        }
    } else {
        # only data
        if(length(statistic) == 1L && statistic == "default") {
            # if data, none by default
            statistic <- character(0L)
        } else {
            stopifnot(statistic %in% c("G2.un", "X2.un"))
        }
    }

    # first, create basic table with response patterns
    for(g in 1:lavdata@ngroups) {
        pat <- lav_data_resp_patterns(lavdata@X[[g]])$pat
        obs.freq <- as.integer( rownames(pat) )
        if(patternAsString) {
            pat <- data.frame(pattern = apply(pat, 1, paste, collapse=""),
                              stringsAsFactors = FALSE)
        } else {
            pat <- as.data.frame(pat, stringsAsFactors = FALSE)
            names(pat) <- lavdata@ov.names[[g]]
        }
        #pat$id <- 1:nrow(pat)
        if(lavdata@ngroups > 1L) {
            pat$group <- rep(g, nrow(pat))
        }
        NOBS <- sum(obs.freq)
        pat$nobs <- rep(NOBS, nrow(pat))
        pat$obs.freq <- obs.freq
        rownames(pat) <- NULL
        if(g == 1L) {
            out <- pat
        } else {
            out <- rbind(out, pat)
        }
    }

    out$obs.prop <- out$obs.freq/out$nobs

    if(any(c("X2.un", "G2.un") %in% statistic)) {
        # not a good statistic... we only have uni+bivariate information
        warning("lavaan WARNING: limited information used for thresholds and correlations; but X2/G2 assumes full information")
        PI <- lav_tables_resp_pi(lavobject = lavobject, lavdata = lavdata,
                                 est = "h1")

        out$est.prop.un <- unlist(PI)
        if("G2.un" %in% statistic) {
            out$G2.un <- lav_tables_stat_G2(out$obs.prop, out$est.prop.un,
                                             out$nobs)
        }
        if("X2.un" %in% statistic) {
            out$X2.un <- lav_tables_stat_X2(out$obs.prop, out$est.prop.un,
                                             out$nobs)
        }
    }
    if(any(c("X2", "G2") %in% statistic)) {
        if(lavobject@Options$estimator %in% c("FML")) {
            # ok, nothing to say
        } else if(lavobject@Options$estimator %in%
                      c("WLS","DWLS","PML","ULS")) {
            warning("lavaan WARNING: estimator ", lavobject@Options$estimator,
                    " is not using full information while est.prop is using full information")
        } else {
            stop("lavaan ERROR: estimator ", lavobject@Options$estimator,
                 " is not supported.")
        }

        PI <- lav_tables_resp_pi(lavobject = lavobject, lavdata = lavdata,
                                 est = "h0")

        out$est.prop <- unlist(PI)
        if("G2" %in% statistic) {
            out$G2 <- lav_tables_stat_G2(out$obs.prop, out$est.prop,
                                         out$nobs)
        }
        if("X2" %in% statistic) {
            out$X2 <- lav_tables_stat_X2(out$obs.prop, out$est.prop,
                                         out$nobs)
        }
    }

    # remove nobs?
    # out$nobs <- NULL

    out
}

# pairwise tables, rows = table cells
lav_tables_pairwise_cells <- function(lavobject = NULL, lavdata = NULL,
                                      statistic = character(0L)) {

    # this only works if we have at least two 'categorical' variables
    cat.idx <- which(lavdata@ov$type %in% c("ordered","factor"))
    if(length(cat.idx) == 0L) {
        warning("lavaan WARNING: no categorical variables are found")
        return(data.frame(id=integer(0L), lhs=character(0L), rhs=character(0L),
                          nobs=integer(0L), row=integer(0L), col=integer(0L),
                          obs.freq=integer(0L), obs.prop=numeric(0L)))
    }
    if(length(cat.idx) == 1L) {
        warning("lavaan WARNING: at least two categorical variables are needed")
        return(data.frame(id=integer(0L), lhs=character(0L), rhs=character(0L),
                          nobs=integer(0L), row=integer(0L), col=integer(0L),
                          obs.freq=integer(0L), obs.prop=numeric(0L)))
    }

    # default statistics
    if(!is.null(lavobject)) {
        if(length(statistic) == 1L && statistic == "default") {
            statistic <- c("X2")
        } else {
            stopifnot(statistic %in% c("cor", "th", "X2","G2",
                                       "cor.un", "th.un", "X2.un","G2.un"))
        }
    } else {
        if(length(statistic) == 1L && statistic == "default") {
            # if data, none by default
            statistic <- character(0L)
        } else {
            stopifnot(statistic %in% c("cor.un", "th.un", "X2.un","G2.un"))
        }
    }

    # initial table, observed cell frequencies
    out <- lav_tables_pairwise_freq_cell(lavdata = lavdata,
                                         as.data.frame. = TRUE)
    out$obs.prop <- out$obs.freq/out$nobs

    if(any(c("cor.un", "th.un", "X2.un", "G2.un") %in% statistic)) {
        PI <- lav_tables_pairwise_sample_pi(lavobject = lavobject,
                                            lavdata   = lavdata)
        out$est.prop.un <- unlist(PI)
        if("G2.un" %in% statistic) {
            out$G2.un <- lav_tables_stat_G2(out$obs.prop, out$est.prop.un,
                                             out$nobs)
        }
        if("X2.un" %in% statistic) {
            out$X2.un <- lav_tables_stat_X2(out$obs.prop, out$est.prop.un,
                                             out$nobs)
        }

        if("cor.un" %in% statistic) {
            COR <- attr(PI, "COR")
            cor.all <- unlist(lapply(COR, function(x)
                                              x[lower.tri(x, diag=FALSE)]))
            out$cor.un <- cor.all[out$id]
        }
    }
    if(any(c("cor", "th", "X2", "G2") %in% statistic)) {
        PI <- lav_tables_pairwise_model_pi(lavobject = lavobject)
        out$est.prop <- unlist(PI)
        if("G2" %in% statistic) {
            out$G2 <- lav_tables_stat_G2(out$obs.prop, out$est.prop,
                                         out$nobs)
        }
        if("X2" %in% statistic) {
            out$X2 <- lav_tables_stat_X2(out$obs.prop, out$est.prop,
                                         out$nobs)
        }
        if("cor" %in% statistic) {
            COR <- attr(PI, "COR")
            cor.all <- unlist(lapply(COR, function(x)
                                              x[lower.tri(x, diag=FALSE)]))
            out$cor <- cor.all[out$id]
        }
    }

    out
}

# G2 statistic
lav_tables_stat_G2 <- function(obs.prop = NULL, est.prop = NULL, nobs = NULL) {
    # not defined if out$obs.prop is (close to) zero
    zero.idx <- which(obs.prop < .Machine$double.eps)
    if(length(zero.idx)) {
        obs.prop[zero.idx] <- as.numeric(NA)
    }
    # the usual G2 formula
    G2 <- 2*nobs*(obs.prop*log(obs.prop/est.prop))
    G2
}

# X2 (aka X2) statistic
lav_tables_stat_X2 <- function(obs.prop = NULL, est.prop = NULL, nobs = NULL) {
    res.prop <- obs.prop-est.prop
    X2 <- nobs*(res.prop*res.prop)/est.prop
    X2
}

# pairwise tables, rows = tables
lav_tables_pairwise_table <- function(lavobject = NULL, lavdata = NULL,
                                      statistic = character(0L),
                                      G2.min = 3.0,
                                      X2.min = 3.0,
                                      p.value = FALSE) {

    # default statistics
    if(!is.null(lavobject)) {
        if(length(statistic) == 1L && statistic == "default") {
            statistic <- c("X2", "X2.average")
        } else {
            stopifnot(statistic %in% c("X2","G2","X2.un","G2.un",
                                       "cor", "cor.un",
                                       "RMSEA.un", "RMSEA",
                                       "G2.average",
                                       "G2.nlarge",
                                       "G2.plarge",
                                       "X2.average",
                                       "X2.nlarge",
                                       "X2.plarge"))
        }
    } else {
        if(length(statistic) == 1L && statistic == "default") {
            # if data, none by default
            statistic <- character(0L)
        } else {
            stopifnot(statistic %in% c("cor.un", "X2.un","G2.un",
                                       "RMSEA.un"))
        }
    }

    # identify 'categorical' variables
    #cat.idx <- which(lavdata@ov$type %in% c("ordered","factor"))

    # pairwise tables
    #pairwise.tables <- utils::combn(vartable$name[cat.idx], m=2L)
    #pairwise.tables <- rbind(seq_len(ncol(pairwise.tables)),
    #                         pairwise.tables)
    #ntables <- ncol(pairwise.tables)

    # initial table, observed cell frequencies
    #out <- as.data.frame(t(pairwise.tables))
    #names(out) <- c("id", "lhs", "rhs")

    # collapse approach
    stat.cell <- character(0)
    if(any(c("G2","G2.average","G2.plarge","G2.nlarge") %in% statistic)) {
        stat.cell <- c(stat.cell, "G2")
    }
    if(any(c("X2","X2.average","X2.plarge","X2.nlarge") %in% statistic)) {
        stat.cell <- c(stat.cell, "X2")
    }
    if("G2" %in% statistic || "RMSEA" %in% statistic) {
        stat.cell <- c(stat.cell, "G2")
    }
    if("X2.un" %in% statistic) {
        stat.cell <- c(stat.cell, "X2.un")
    }
    if("G2.un" %in% statistic || "RMSEA.un" %in% statistic) {
        stat.cell <- c(stat.cell, "G2.un")
    }
    if("cor.un" %in% statistic) {
        stat.cell <- c(stat.cell, "cor.un")
    }
    if("cor" %in% statistic) {
        stat.cell <- c(stat.cell, "cor")
    }

    # get table with table cells
    out.cell <- lav_tables_pairwise_cells(lavobject = lavobject,
                                          lavdata   = lavdata,
                                          statistic = stat.cell)
    # only 1 row per table
    row.idx <- which(!duplicated(out.cell$id))
    if(is.null(out.cell$group)) {
        out <- out.cell[row.idx,c("lhs","rhs","nobs"),drop=FALSE]
    } else {
        out <- out.cell[row.idx,c("lhs","rhs","group", "nobs"),drop=FALSE]
    }

    # df
    if(length(statistic) > 0L) {
        nrow <- tapply(out.cell$row, INDEX=out.cell$id, FUN=max)
        ncol <- tapply(out.cell$col, INDEX=out.cell$id, FUN=max)
        out$df <- nrow*ncol - nrow - ncol
    }

    # cor
    if("cor" %in% statistic) {
        out$cor <- out.cell[row.idx, "cor"]
    }

    # cor.un
    if("cor.un" %in% statistic) {
        out$cor.un <- out.cell[row.idx, "cor.un"]
    }

    # X2
    if("X2" %in% statistic) {
        out$X2 <- tapply(out.cell$X2, INDEX=out.cell$id, FUN=sum,
                         na.rm=TRUE)
        if(p.value) {
            out$X2.pval <- pchisq(out$X2, df=out$df, lower.tail=FALSE)
        }
    }
    if("X2.un" %in% statistic) {
        out$X2.un <- tapply(out.cell$X2.un, INDEX=out.cell$id, FUN=sum,
                             na.rm=TRUE)
        if(p.value) {
            out$X2.un.pval <- pchisq(out$X2.un, df=out$df, lower.tail=FALSE)
        }
    }

    # G2
    if("G2" %in% statistic) {
        out$G2 <- tapply(out.cell$G2, INDEX=out.cell$id, FUN=sum,
                         na.rm=TRUE)
        if(p.value) {
            out$G2.pval <- pchisq(out$G2, df=out$df, lower.tail=FALSE)
        }
    }
    if("G2.un" %in% statistic) {
        out$G2.un <- tapply(out.cell$G2.un, INDEX=out.cell$id, FUN=sum,
                             na.rm=TRUE)
        if(p.value) {
            out$G2.un.pval <- pchisq(out$G2.un, df=out$df, lower.tail=FALSE)
        }
    }

    if("RMSEA" %in% statistic) {
        G2 <- tapply(out.cell$G2, INDEX=out.cell$id, FUN=sum, na.rm=TRUE)
        # note: there seems to be a mistake in Appendix 1 eqs 43/44 of Joreskog
        # SSI paper (2005) 'SEM with ordinal variables using LISREL'
        # 2*N*d should N*d
        out$RMSEA <- sqrt( pmax(0, (G2 - out$df)/ (out$nobs*out$df) ) )
        if(p.value) {
            # note: MUST use 1 - pchisq (instead of lower.tail = FALSE)
            # because for ncp > 80, routine only computes lower tail
            out$RMSEA.pval <- 1.0 - pchisq(G2,
                                           ncp = 0.1*0.1*out$nobs*out$df,
                                           df=out$df, lower.tail = TRUE)
        }
    }
    if("RMSEA.un" %in% statistic) {
        G2 <- tapply(out.cell$G2.un, INDEX=out.cell$id, FUN=sum,
                     na.rm=TRUE)
        # note: there seems to be a mistake in Appendix 1 eqs 43/44 of Joreskog
        # SSI paper (2005) 'SEM with ordinal variables using LISREL'
        # 2*N*d should N*d
        out$RMSEA.un <- sqrt( pmax(0, (G2 - out$df)/ (out$nobs*out$df) ) )
        if(p.value) {
            # note: MUST use 1 - pchisq (instead of lower.tail = FALSE)
            # because for ncp > 80, routine only computes lower tail
            out$RMSEA.un.pval <- 1.0 - pchisq(G2,
                                               ncp = 0.1*0.1*out$nobs*out$df,
                                               df=out$df, lower.tail = TRUE)
        }
    }

    if("G2.average" %in% statistic) {
        out$G2.average <- tapply(out.cell$G2, INDEX=out.cell$id, FUN=mean,
                                 na.rm=TRUE)
    }

    if("G2.nlarge" %in% statistic) {
         out$G2.min <- rep(G2.min, length(out$lhs))
         out$G2.nlarge <- tapply(out.cell$G2, INDEX=out.cell$id,
                                 FUN=function(x) sum(x > G2.min, na.rm=TRUE) )
    }

    if("G2.plarge" %in% statistic) {
         out$G2.min <- rep(G2.min, length(out$lhs))
         out$G2.plarge <- tapply(out.cell$G2, INDEX=out.cell$id,
                                 FUN=function(x) sum(x > G2.min, na.rm=TRUE)/length(x) )
    }

    if("X2.average" %in% statistic) {
        out$X2.average <- tapply(out.cell$X2, INDEX=out.cell$id, FUN=mean,
                                 na.rm=TRUE)

    }

    if("X2.nlarge" %in% statistic) {
         out$X2.min <- rep(X2.min, length(out$lhs))
         out$X2.nlarge <- tapply(out.cell$X2, INDEX=out.cell$id,
                                 FUN=function(x) sum(x > X2.min, na.rm=TRUE) )
    }

    if("X2.plarge" %in% statistic) {
         out$X2.min <- rep(X2.min, length(out$lhs))
         out$X2.plarge <- tapply(out.cell$X2, INDEX=out.cell$id,
                                 FUN=function(x) sum(x > X2.min, na.rm=TRUE)/length(x) )
    }

    out
}


lav_tables_oneway <- function(lavobject = NULL, lavdata = NULL,
                              statistic = NULL) {

    # shortcuts
    vartable <- lavdata@ov
    X        <- lavdata@X

    # identify 'categorical' variables
    cat.idx <- which(vartable$type %in% c("ordered","factor"))
    ncat <- length(cat.idx)

    # do we have any categorical variables?
    if(length(cat.idx) == 0L) {
        warning("lavaan WARNING: no categorical variables are found")
        return(data.frame(id=integer(0L), lhs=character(0L), rhs=character(0L),
                          nobs=integer(0L),
                          obs.freq=integer(0L), obs.prop=numeric(0L),
                          est.prop=numeric(0L), X2=numeric(0L)))
    } else {
        labels <- strsplit(vartable$lnam[cat.idx], "\\|")
    }

    # ok, we have an overview of all categorical variables in the data
    ngroups <- length(X)

    # for each group, for each categorical variable, collect information
    TABLES <- vector("list", length=ngroups)
    for(g in 1:ngroups) {
        TABLES[[g]] <- lapply(seq_len(ncat),
            FUN=function(x) {
                idx <- cat.idx[x]
                nrow <- vartable$nlev[idx]
                ncell<- nrow
                nvar <- length(lavdata@ov.names[[g]])
                id <- (g-1)*nvar + x

                # compute observed frequencies
                FREQ <- tabulate(X[[g]][,idx], nbins = ncell)

                list(   id = rep.int(id, ncell),
                       lhs = rep.int(vartable$name[idx], ncell),
                       # op = rep.int("freq", ncell),
                       rhs = labels[[x]],
                     group = rep.int(g, ncell),
                      nobs = rep.int(sum(FREQ), ncell),
                      obs.freq = FREQ,
                      obs.prop = FREQ/sum(FREQ)
                    )
            })
    }

    for(g in 1:ngroups) {
        TABLE <- TABLES[[g]]
        TABLE <- lapply(TABLE, as.data.frame, stringsAsFactors=FALSE)
        if(g == 1L) {
            out <- do.call(rbind, TABLE)
        } else {
            out <- rbind(out, do.call(rbind, TABLE))
        }
    }
    if(g == 1) {
        # remove group column
        out$group <- NULL
    }

    # default statistics
    if(!is.null(lavobject)) {
        if(length(statistic) == 1L && statistic == "default") {
            statistic <- c("X2")
        } else {
            stopifnot(statistic %in% c("th.un",
                                       "th", "G2", "X2"))
        }

        # sample based
        # note, there is no G2.un or X2.un: always saturated!
        if("th.un" %in% statistic) {
            # sample based
            th <- unlist(lapply(1:lavdata@ngroups, function(x) {
                      if(lavobject@Model@conditional.x) {
                          TH <- lavobject@SampleStats@res.th[[x]][
                                lavobject@SampleStats@th.idx[[x]] > 0 ]
                      } else {
                          TH <- lavobject@SampleStats@th[[x]][
                                lavobject@SampleStats@th.idx[[x]] > 0 ]
                      }
                      TH.IDX <- lavobject@SampleStats@th.idx[[x]][
                                lavobject@SampleStats@th.idx[[x]] > 0 ]
                  unname(unlist(tapply(TH,
                                INDEX=TH.IDX,
                                function(y) c(y,Inf)))) }))
            # overwrite obs.prop
            # NOTE: if we have exogenous variables, obs.prop will NOT
            #       correspond with qnorm(th)
            out$obs.prop <- unname(unlist(tapply(th, INDEX=out$id,
                               FUN=function(x) (pnorm(c(x,Inf)) -
                                  pnorm(c(-Inf,x)))[-(length(x)+1)]  )))

            out$th.un <- th
        }

        # model based
        if(any(c("th","G2","X2") %in% statistic)) {

            # model based
            th.h0 <- unlist(lapply(1:lavdata@ngroups, function(x) {
                         if(lavobject@Model@conditional.x) {
                             TH <- lavobject@implied$res.th[[x]][
                                    lavobject@SampleStats@th.idx[[x]] > 0 ]
                         } else {
                             TH <- lavobject@implied$th[[x]][
                                    lavobject@SampleStats@th.idx[[x]] > 0 ]
                         }
                      TH.IDX <- lavobject@SampleStats@th.idx[[x]][
                                lavobject@SampleStats@th.idx[[x]] > 0 ]
                    unname(unlist(tapply(TH, INDEX=TH.IDX,
                                  function(x) c(x,Inf)))) }))

            est.prop <- unname(unlist(tapply(th.h0, INDEX=out$id,
                            FUN=function(x) (pnorm(c(x,Inf)) -
                                pnorm(c(-Inf,x)))[-(length(x)+1)]  )))
            out$est.prop <- est.prop

            if("th" %in% statistic) {
                out$th <- th.h0
            }
            if("G2" %in% statistic) {
                out$G2 <- lav_tables_stat_G2(out$obs.prop, out$est.prop,
                                             out$nobs)
            }
            if("X2" %in% statistic) {
                out$X2 <- lav_tables_stat_X2(out$obs.prop, out$est.prop,
                                             out$nobs)
            }
        }
    } else {
        if(length(statistic) == 1L && statistic == "default") {
            # if data, none by default
            statistic <- character(0L)
        } else {
            stopifnot(statistic %in% c("th.un"))
        }

        if("th.un" %in% statistic) {
            out$th.un <- unlist(tapply(out$obs.prop, INDEX=out$id,
                                 FUN=function(x) qnorm(cumsum(x))))
        }
    }

    out
}

# compute pairwise (two-way) frequency tables
lav_tables_pairwise_freq_cell <- function(lavdata = NULL,
                                          as.data.frame. = TRUE) {

    # shortcuts
    vartable <- as.data.frame(lavdata@ov, stringsAsFactors = FALSE)
    X        <- lavdata@X
    ov.names <- lavdata@ov.names
    ngroups  <- lavdata@ngroups

    # identify 'categorical' variables
    cat.idx <- which(vartable$type %in% c("ordered","factor"))

    # do we have any categorical variables?
    if(length(cat.idx) == 0L) {
        stop("lavaan ERROR: no categorical variables are found")
    } else if(length(cat.idx) == 1L) {
        stop("lavaan ERROR: at least two categorical variables are needed")
    }

    # pairwise tables
    pairwise.tables <- utils::combn(vartable$name[cat.idx], m=2L)
    pairwise.tables <- rbind(pairwise.tables, seq_len(ncol(pairwise.tables)))
    ntables <- ncol(pairwise.tables)

    # for each group, for each pairwise table, collect information
    TABLES <- vector("list", length=ngroups)
    for(g in 1:ngroups) {
        TABLES[[g]] <- apply(pairwise.tables, MARGIN=2,
            FUN=function(x) {
                idx1 <- which(vartable$name == x[1])
                idx2 <- which(vartable$name == x[2])
                id <- (g-1)*ntables + as.numeric(x[3])
                nrow <- vartable$nlev[idx1]
                ncol <- vartable$nlev[idx2]
                ncell <- nrow*ncol

                # compute two-way observed frequencies
                Y1 <- X[[g]][,idx1]
                Y2 <- X[[g]][,idx2]
                # FREQ <- table(Y1, Y2) # we loose missings; useNA is ugly
                FREQ <- lav_bvord_freq(Y1, Y2)

                list(   id = rep.int(id, ncell),
                       lhs = rep.int(x[1], ncell),
                       # op = rep.int("table", ncell),
                       rhs = rep.int(x[2], ncell),
                     group = rep.int(g, ncell),
                      nobs = rep.int(sum(FREQ), ncell),
                       row = rep.int(seq_len(nrow), times=ncol),
                       col = rep(seq_len(ncol), each=nrow),
                      obs.freq = lav_matrix_vec(FREQ) # col by col!
                    )
            })
    }

    if(as.data.frame.) {
        for(g in 1:ngroups) {
            TABLE <- TABLES[[g]]
            TABLE <- lapply(TABLE, as.data.frame, stringsAsFactors=FALSE)
            if(g == 1) {
                out <- do.call(rbind, TABLE)
            } else {
                out <- rbind(out, do.call(rbind, TABLE))
            }
        }
        if(g == 1) {
            # remove group column
            out$group <- NULL
        }
    } else {
        if(ngroups == 1L) {
            out <- TABLES[[1]]
        } else {
            out <- TABLES
        }
    }

    out
}


# low-level function to compute expected proportions per cell
# object
lav_tables_pairwise_model_pi <- function(lavobject = NULL) {

    stopifnot(lavobject@Model@categorical)

    # shortcuts
    lavmodel  <- lavobject@Model
    implied   <- lavobject@implied
    ngroups   <- lavobject@Data@ngroups
    ov.types  <- lavobject@Data@ov$type
    th.idx    <- lavobject@Model@th.idx
    Sigma.hat <- if(lavmodel@conditional.x) implied$res.cov else implied$cov
    TH        <- if(lavmodel@conditional.x) implied$res.th  else implied$th

    PI <- vector("list", length=ngroups)
    for(g in 1:ngroups) {
        Sigmahat <- Sigma.hat[[g]]
        cors <- Sigmahat[lower.tri(Sigmahat)]
        if(any(abs(cors) > 1)) {
            warning("lavaan WARNING: some model-implied correlations are larger than 1.0")
        }
        nvar <- nrow(Sigmahat)

        # shortcut for all ordered - tablewise
        if(all(ov.types == "ordered") && !is.null(lavobject@Cache[[g]]$LONG)) {
            #FREQ.OBS <- c(FREQ.OBS, lavobject@Cache[[g]]$bifreq)
            LONG2 <- LongVecTH.Rho(no.x               = nvar,
                                   all.thres          = TH[[g]],
                                   index.var.of.thres = th.idx[[g]],
                                   rho.xixj           = cors)
            # get expected probability per table, per pair
            PI[[g]] <- pairwiseExpProbVec(ind.vec = lavobject@Cache[[g]]$LONG,
                                          th.rho.vec=LONG2)
        } else {
            PI.group <- integer(0)
            # order! first i, then j, lav_matrix_vec(table)!
            for(i in seq_len(nvar-1L)) {
                for(j in (i+1L):nvar) {
                    if(ov.types[i] == "ordered" && ov.types[j] == "ordered") {
                        PI.table <- lav_bvord_noexo_pi(rho   = Sigmahat[i,j],
                                          th.y1 = TH[[g]][ th.idx[[g]] == i ],
                                          th.y2 = TH[[g]][ th.idx[[g]] == j ])
                        PI.group <- c(PI.group, lav_matrix_vec(PI.table))
                    }
                }
            }
            PI[[g]] <- PI.group
        }

    } # g

    # add  COR/TH/TH.IDX
    attr(PI, "COR") <- Sigma.hat
    attr(PI, "TH")  <- TH
    attr(PI, "TH.IDX") <- th.idx

    PI
}

# low-level function to compute expected proportions per cell
# using sample-based correlations + thresholds
#
# object can be either lavData or lavaan class
lav_tables_pairwise_sample_pi <- function(lavobject = NULL, lavdata = NULL) {

    # get COR, TH and th.idx
    if(!is.null(lavobject)) {
        if(lavobject@Model@conditional.x) {
            COR    <- lavobject@SampleStats@res.cov
            TH     <- lavobject@SampleStats@res.th
        } else {
            COR    <- lavobject@SampleStats@cov
            TH     <- lavobject@SampleStats@th
        }
        TH.IDX <- lavobject@SampleStats@th.idx
    } else if(!is.null(lavdata)) {
        fit.un <- lavCor(object = lavdata, se = "none", output = "fit")
        if(fit.un@Model@conditional.x) {
            COR    <- fit.un@SampleStats@res.cov
            TH     <- fit.un@SampleStats@res.th
        } else {
            COR    <- fit.un@SampleStats@cov
            TH     <- fit.un@SampleStats@th
        }
        TH.IDX <- fit.un@SampleStats@th.idx
    } else {
        stop("lavaan ERROR: both lavobject and lavdata are NULL")
    }

    lav_tables_pairwise_sample_pi_cor(COR = COR, TH = TH,
                                      TH.IDX = TH.IDX)
}

# low-level function to compute expected proportions per cell
lav_tables_pairwise_sample_pi_cor <- function(COR = NULL, TH = NULL,
                                              TH.IDX = NULL) {

    ngroups <- length(COR)

    PI <- vector("list", length=ngroups)
    for(g in 1:ngroups) {
        Sigmahat <- COR[[g]]
        cors <- Sigmahat[lower.tri(Sigmahat)]
        if(any(abs(cors) > 1)) {
            warning("lavaan WARNING: some model-implied correlations are larger than 1.0")
        }
        nvar <- nrow(Sigmahat)
        th.idx <- TH.IDX[[g]]

        # reconstruct ov.types
        ov.types <- rep("numeric", nvar)
        ord.idx <- unique(th.idx[th.idx > 0])
        ov.types[ord.idx] <- "ordered"

        PI.group <- integer(0)
        # order! first i, then j, lav_matrix_vec(table)!
        for(i in seq_len(nvar-1L)) {
            for(j in (i+1L):nvar) {
                if(ov.types[i] == "ordered" && ov.types[j] == "ordered") {
                    PI.table <- lav_bvord_noexo_pi(rho   = Sigmahat[i,j],
                                      th.y1 = TH[[g]][ th.idx == i ],
                                      th.y2 = TH[[g]][ th.idx == j ])
                    PI.group <- c(PI.group, lav_matrix_vec(PI.table))
                }
            }
        }
        PI[[g]] <- PI.group
    } # g

    # add  COR/TH/TH.IDX
    attr(PI, "COR") <- COR
    attr(PI, "TH")  <- TH
    attr(PI, "TH.IDX") <- TH.IDX

    PI

}

# low-level function to compute expected proportions per PATTERN
# using sample-based correlations + thresholds
#
# object can be either lavData or lavaan class
#
# only valid if estimator = FML, POM or NOR
#
lav_tables_resp_pi <- function(lavobject = NULL, lavdata = NULL,
                               est = "h0") {

    # shortcuts
    if(!is.null(lavobject)) {
        lavmodel <- lavobject@Model
        implied  <- lavobject@implied
    }
    ngroups <- lavdata@ngroups

    # h0 or unrestricted?
    if(est == "h0") {
        Sigma.hat <- if(lavmodel@conditional.x) implied$res.cov else implied$cov
        TH        <- if(lavmodel@conditional.x) implied$res.th  else implied$th
        TH.IDX    <- lavobject@SampleStats@th.idx
    } else {
        if(is.null(lavobject)) {
            fit.un <- lavCor(object = lavdata, se = "none", output = "fit")
            Sigma.hat  <- if(fit.un@Model@conditional.x) fit.un@implied$res.cov else fit.un@implied$cov
            TH         <- if(fit.un@Model@conditional.x) fit.un@implied$res.th  else fit.un@implied$th
            TH.IDX     <- fit.un@SampleStats@th.idx
        } else {
            if(lavobject@Model@conditional.x) {
                Sigma.hat <- lavobject@SampleStats@res.cov
                TH        <- lavobject@SampleStats@res.th
            } else {
                Sigma.hat <- lavobject@SampleStats@cov
                TH        <- lavobject@SampleStats@th
            }
            TH.IDX    <- lavobject@SampleStats@th.idx
        }
    }

    PI <- vector("list", length=ngroups)
    for(g in 1:ngroups) {
        Sigmahat <- Sigma.hat[[g]]
        cors <- Sigmahat[lower.tri(Sigmahat)]
        if(any(abs(cors) > 1)) {
            warning("lavaan WARNING: some model-implied correlations are larger than 1.0")
        }
        nvar <- nrow(Sigmahat)
        th.idx <- TH.IDX[[g]]
        MEAN <- rep(0, nvar)

        # reconstruct ov.types
        ov.types <- rep("numeric", nvar)
        ord.idx <- unique(th.idx[th.idx > 0])
        ov.types[ord.idx] <- "ordered"

        if(all(ov.types == "ordered")) {
            # get patterns ## FIXME GET it
            if(!is.null(lavdata@Rp[[g]]$pat)) {
                PAT <- lavdata@Rp[[g]]$pat
            } else {
                PAT <- lav_data_resp_patterns( lavdata@X[[g]] )$pat
            }
            npatterns <- nrow(PAT)
            freq <- as.numeric( rownames(PAT) )
            PI.group <- numeric(npatterns)
            TH.VAR <- lapply(1:nvar,
                function(x) c(-Inf, TH[[g]][th.idx==x], +Inf))
            # FIXME!!! ok to set diagonal to 1.0?
            diag(Sigmahat) <- 1.0
            for(r in 1:npatterns) {
                # compute probability for each pattern
                lower <- sapply(1:nvar, function(x) TH.VAR[[x]][ PAT[r,x]      ])
                upper <- sapply(1:nvar, function(x) TH.VAR[[x]][ PAT[r,x] + 1L ])
                # handle missing values
                na.idx <- which(is.na(PAT[r,]))
                if(length(na.idx) > 0L) {
                    lower <- lower[-na.idx]
                    upper <- upper[-na.idx]
                    MEAN.r <- MEAN[-na.idx]
                    Sigmahat.r <- Sigmahat[-na.idx, -na.idx, drop=FALSE]
                } else {
                    MEAN.r <- MEAN
                    Sigmahat.r <- Sigmahat
                }
                PI.group[r] <- sadmvn(lower, upper, mean=MEAN.r,
                                      varcov=Sigmahat.r)
            }
        } else { # case-wise
            PI.group <- rep(as.numeric(NA), lavdata@nobs[[g]])
            warning("lavaan WARNING: casewise PI not implemented")
        }

        PI[[g]] <- PI.group
    } # g

    PI
}

lav_tables_table_format <- function(out, lavdata = lavdata,
                                    lavobject = lavobject) {

    # determine column we need
    NAMES <- names(out)
    stat.idx <- which(NAMES %in% c("cor",      "cor.un",
                                   "G2",        "G2.un",
                                   "X2",        "X2.un",
                                   "RMSEA",  "RMSEA.un",
                                   "G2.average", "G2.plarge", "G2.nlarge",
                                   "X2.average", "X2.plarge", "X2.nlarge"))
    if(length(stat.idx) == 0) {
        if(!is.null(out$obs.freq)) {
            stat.idx <- which(NAMES == "obs.freq")
        } else if(!is.null(out$nobs)) {
            stat.idx <- which(NAMES == "nobs")
        }
        UNI <- NULL
    } else if(length(stat.idx) > 1) {
        stop("lavaan ERROR: more than one statistic for table output: ",
              paste(NAMES[stat.idx], collapse=" "))
    } else {
        # univariate version of same statistic
        if(NAMES[stat.idx] == "G2.average") {
            UNI <- lavTables(lavobject, dimension = 1L, statistic="G2")
        } else if(NAMES[stat.idx] == "X2.average") {
            UNI <- lavTables(lavobject, dimension = 1L, statistic="X2")
        } else {
            UNI <- NULL
        }
    }

    OUT <- vector("list", length=lavdata@ngroups)
    for(g in 1:lavdata@ngroups) {
        if(lavdata@ngroups == 1L) { # no group column
            STAT <- out[[stat.idx]]
        } else {
            STAT <- out[[stat.idx]][ out$group == g ]
        }
        RN <- lavdata@ov.names[[g]]
        OUT[[g]] <- getCov(STAT, diagonal = FALSE, lower = FALSE, names = RN)
        # change diagonal elements: replace by univariate stat
        # if possible
        diag(OUT[[g]]) <- as.numeric(NA)
        if(!is.null(UNI)) {
            if(!is.null(UNI$group)) {
                idx <- which( UNI$group == g )
            } else {
                idx <- 1:length(UNI$lhs)
            }
            if(NAMES[stat.idx] == "G2.average") {
                diag(OUT[[g]]) <- tapply(UNI$G2[idx], INDEX=UNI$id[idx],
                                         FUN=mean)
            } else if(NAMES[stat.idx] == "X2.average") {
                diag(OUT[[g]]) <- tapply(UNI$X2[idx], INDEX=UNI$id[idx],
                                         FUN=mean)
            }
        } else if(NAMES[stat.idx] %in% c("cor", "cor.un")) {
            diag(OUT[[g]]) <- 1
        }
        class(OUT[[g]]) <- c("lavaan.matrix.symmetric", "matrix")
    }
    if(lavdata@ngroups > 1L) {
        names(OUT) <- lavdata@group.label
        out <- OUT
    } else {
        out <- OUT[[1]]
    }

    out
}

lav_tables_cells_format <- function(out, lavdata = lavdata,
                                    drop.list.single.group = FALSE) {

    OUT <- vector("list", length=lavdata@ngroups)
    if(is.null(out$group)) {
        out$group <- rep(1L, length(out$lhs))
    }
    # do we have a statistic?
    # determine column we need
    NAMES <- names(out)
    stat.idx <- which(NAMES %in% c("cor",     "cor.un",
                                   "G2",       "G2.un",
                                   "X2",       "X2.un",
                                   "RMSEA", "RMSEA.un",
                                   "G2.average", "G2.plarge", "G2.nlarge",
                                   "X2.average", "X2.plarge", "X2.nlarge"))
    if(length(stat.idx) == 0) {
        statistic <- "obs.freq"
    } else if(length(stat.idx) > 1) {
         stop("lavaan ERROR: more than one statistic for table output: ",
              paste(NAMES[stat.idx], collapse=" "))
    } else {
        statistic <- NAMES[stat.idx]
    }

    for(g in 1:lavdata@ngroups) {
        case.idx <- which( out$group == g )
        ID.group <- unique( out$id[ out$group == g] )
        TMP <-lapply(ID.group, function(x) {
                  Tx <- out[out$id == x,]
                  M <- matrix(Tx[,statistic],
                              max(Tx$row), max(Tx$col))
                  rownames(M) <- unique(Tx$row)
                  colnames(M) <- unique(Tx$col)
                  class(M) <- c("lavaan.matrix", "matrix")
                  M })
        names(TMP) <- unique(paste(out$lhs[case.idx], out$rhs[case.idx],
                                   sep="_"))
        OUT[[g]] <- TMP
    }

    if(lavdata@ngroups == 1L && drop.list.single.group) {
        OUT <- OUT[[1]]
    } else {
        if(length(lavdata@group.label) > 0L) {
            names(OUT) <- unlist(lavdata@group.label)
        }
    }

    OUT
}

Try the lavaan package in your browser

Any scripts or data that you put into this service are public.

lavaan documentation built on July 26, 2023, 5:08 p.m.