R/stremo.R
In stremo: Functions to help the process of learning structural equation modelling

#require(MASS)
#require(lavaan)
#require(numDeriv)

## Mirror the lower or upper diagonal of a square matrix
mirror.tri <- function(mat, upper.on.lower = FALSE) {
    if (upper.on.lower) {
        mat[lower.tri(mat)] <- t(mat)[lower.tri(mat)]
    } else {
        mat[upper.tri(mat)] <- t(mat)[upper.tri(mat)]
    }
    mat
}

## Factor analysis using principal components as the extraction method
factanal.prcomp <- function (covmat, nfactors = NULL) {
    prc <- princomp(covmat = covmat, cor = TRUE)
    eig <- prc$sdev^2
    if (is.null(nfactors)) nfactors <- sum(eig >= 1) 
    loads <- prc$loadings[, 1:nfactors]
    loads[, 1:nfactors] %*% diag(prc$sdev[1:nfactors])
}

## Get a covariance matrix from a correlation matrix and std deviations
cor2cov <- function(cormat, sds) {
    outer(sds, sds, "*") * cormat
}

## Check if a square matrix is Hermitian
is.hermitian <- function(covmat) {
    t.covmat <- t(covmat)
    if (all(dim(covmat) == dim(t.covmat)) && all(covmat == t(covmat))) {
        return(TRUE)
    } else {
        return(FALSE)
    }
}

## Check if all covariances are within bounds
is.within.bounds <- function(covmat) {
    if (!is.hermitian(covmat)) stop("Matrix is not Hermitian")
    sds <- sqrt(diag(covmat))
    max.cov <- outer(sds, sds, "*")
    differences <- round(max.cov - covmat, 3)
    res <- all(differences >= 0)
    if (!res) {
      outside.bounds <- which(differences < 0, arr.ind = TRUE)
      varnames <- rownames(differences)
      cat("Out of bounds covariances:\n\n")
      if (!is.null(varnames)) {
          for (i in 1:(NROW(outside.bounds)/2)) {
              rn <- varnames[outside.bounds[i, 1]]
              cn <- varnames[outside.bounds[i, 2]]
              cat(sprintf("%s and %s: max - %f, obs - %f", rn,
                          cn, max.cov[rn, cn], covmat[rn, cn]), "\n") 
          }
      } else {
          for (i in 1:(NROW(outside.bounds)/2)) {
              rn <- outside.bounds[i, 1]
              cn <- outside.bounds[i, 2]
              cat(sprintf("row %s and column %s: max - %f, obs - %f", rn,
                          cn, max.cov[rn, cn], covmat[rn, cn]), "\n") 
          }
      }
      cat("\n")
    }
    res
}

## Check if a covariance matrix is positive-definite
## We could try to invert it, but instead we opted to do a bunch of
## testing for learning purposes.
is.pd <- function(covmat) {
    ## Check if matrix is Hermitian (lower and upper diag are equal)
    is.ht <- is.hermitian(covmat)
    if (!is.ht) {
        warning("Matrix isn't Hermitian")
        return(FALSE)
    }
    ## Check if all covariances are within bounds
    is.wd <- is.within.bounds(covmat)
    if (!is.wd) {
        warning("Covariances aren't within bounds")
        return(FALSE)
    }
    ## Check if the matrix is invertible (this whole funcion could be
    ## replaced by a call to chol
    is.invertible <- try(solve(covmat), silent = TRUE)
    if (class(is.invertible) == "try-error") {
        warning("Matrix isn't invertible")
        return(FALSE)
    }
    ## Check if all eigenvalues are positive
    eigenvalues <- eigen(covmat)$values
    if (!all(eigenvalues > 0)) {
        warning("Matrix has negative eigenvalues")
        return(FALSE)
    }
    ## Check if the determinant is higher than 0
    determ <- prod(eigenvalues)
    if (determ <= 0) {
        warning("The determinant is equal or lower than 0")
        return(FALSE)
    }
    TRUE
}


## Creates a multivariate normal distribution from a covariance matrix.
rmnorm <- function(covmat, means = 0, sds = NULL, n) {
    if (is.null(sds)) 
        sds <- sqrt(diag(covmat))
    pattern <- factanal.prcomp(covmat = covmat, nfactors = NROW(covmat))
    nvar <- NROW(covmat)
    X <- sapply(seq_len(nvar), function(x) {rnorm(n)})
    X <- t(X)
    Z <- pattern %*% X
    Z <- t(Z)
    Z <- sweep(Z, 2, sds, "*")
    Z <- sweep(Z, 2, means, "+")
    Z
}

## Identify the type of a lavaanified model
model.type <- function(model) {
    if(all(model$op != "=~")) 
        type <- "pa"
    if(any(model$op == "=~") & any(model$op == "~")) 
        type <- "sem"
    if(any(model$op == "=~") & all(model$op != "~")) 
        type <- "cfa"
    type
}

## Build but dont fit a model using lavaan to use as a starting point.
lvnfy <- function(model, data, n = NULL, ...) {
    if (is.hermitian(data)) {
        out <- sem(model, sample.cov = data, sample.nobs = n, do.fit
                      = FALSE, ...)
    } else {
        out <- sem(model, data = data, do.fit = FALSE, ...)
    }
    lv.list <- inspect(out, "list")
    lv.list$ustart <-out@Fit@start
    lv.list <- as.list(lv.list[c(1:4,7:8)])
    vars <- variables(lv.list)
    var.names <- rownames(vars)
    latents <- var.names[!vars[,"lat"]]
    regressed <- !vars[,"lat"] & (vars[,"endo"] | vars[,"exo"])
    vars <- cbind(vars, "latenterise" = regressed)
    attr(lv.list, "vars") <- vars
    lv.list
}

## Get latent variable names
latents <- function(model) {
    vars <- attr(model, "vars")
    rownames(vars)[vars[,"lat"]]
}

## Get observed variables (even if they are not indicators)
observed <- function(model) {
    vars <- attr(model, "vars")
    rownames(vars)[!vars[,"lat"]]
}

## Get indicators and separate them by type
manifests <- function(model) {
    vars <- attr(model, "vars")
    rownames(vars)[vars[,"ind"]]
}

## Get endogenous variables
endogenous <- function(model) {
    vars <- attr(model, "vars")
    rownames(vars)[vars[,"endo"]]
}

## Get endogenous variables
exogenous <- function(model) {
    vars <- attr(model, "vars")
    rownames(vars)[vars[,"exo"]]
}

## Identity (help done)
II <- function(model) {
    II <- BETA(model)
    II[] <- 0
    diag(II) <- 1
    II
}

## Beta matrix (regression coefficients between endogenous variables)
BETA <- function(model) {
    vars <- attr(model, "vars")
    var.names <- rownames(vars)
    ## latents
    latents <- vars[,"lat"]
    obs.not.indicators <- !vars[,"lat"] & (vars[,"exo"] | vars[,"endo"])
    sides <- var.names[latents | obs.not.indicators]
    regressions <- lapply(model, `[`, model$op == "~" | (model$op ==
                                                         "=~" &
                                                         model$rhs %in%
                                                         var.names[latents]))
    sides <- unique(var.names[latents | obs.not.indicators])
    beta.mat <- matrix(0, length(sides), length(sides))
    dimnames(beta.mat) <- list(sides, sides)
    free.pars.pos <- NULL
    index.matrix <- beta.mat
    index.matrix[] <- seq_along(index.matrix)
    free.pars <- regressions$free[regressions$free > 0L]
    if (length(regressions) > 0) {
        for (i in seq_len(length(regressions$lhs))) {
            lhs <- regressions$lhs[i]
            rhs <- regressions$rhs[i]
            op <- regressions$op[i]
            start.value <- regressions$ustart[i] 
            is.free <- regressions$free[i] > 0L
            if (op == "~") {
                beta.mat[lhs, rhs] <- start.value
                if (is.free) {
                    free.pars.pos <- c(free.pars.pos, index.matrix[lhs,
                                       rhs])
                }
            } else{
                beta.mat[rhs, lhs] <- start.value
                if (is.free) {
                    free.pars.pos <- c(free.pars.pos, index.matrix[rhs,
                                       lhs])
                }
            }
        }
    }
    attr(beta.mat, "free.pars") <- free.pars 
    attr(beta.mat, "free.pars.pos") <- free.pars.pos
    beta.mat
}

## Psi matrix (variances and covariances of observed variables).
PSI <- function(model) {
    vars <- attr(model, "vars")
    var.names <- rownames(vars)
    latents <- vars[,"lat"]
    obs.not.indicators <- !vars[,"lat"] & (vars[,"exo"] | vars[,"endo"])
    sides <- var.names[latents | obs.not.indicators]
    psi.mat <- matrix(0, length(sides), length(sides))
    dimnames(psi.mat) <- list(sides, sides)
    var.covar <- lapply(model, `[`, model$op == "~~" & model$lhs %in%
                        sides & model$rhs %in% sides)
    #free.pars.pos <- numeric(length(var.covar$lhs) * 2 - NCOL(psi.mat))
    #free.pars <- numeric(length(free.pars.pos))
    free.pars.pos <- free.pars <- NULL
    index.matrix <- psi.mat
    index.matrix[] <- seq_along(index.matrix)
    if (length(var.covar) == 0) stop("Looks like you forgot to explicitly
                                     assign at least the disturbances in the model.")
    for (i in seq_len(length(var.covar$lhs))) {
        lhs <- var.covar$lhs[i]
        rhs <- var.covar$rhs[i]
        start.value <- var.covar$ustart[i]
        pos <- var.covar$free[i]
        is.free <- pos > 0L
        if (lhs == rhs) {
            psi.mat[lhs, rhs] <- start.value
            if (is.free) {
                free.pars.pos <- c(free.pars.pos, index.matrix[lhs, rhs])
                free.pars <- c(free.pars, pos)
            }
        } else {
            psi.mat[lhs, rhs] <- psi.mat[rhs, lhs] <- start.value
            if (is.free) {
                free.pars.pos <- c(free.pars.pos, index.matrix[lhs, rhs],
                                   index.matrix[rhs, lhs])
                free.pars <- c(free.pars, rep(pos, 2))
            }
        }
    }
    if (length(psi.mat) == 0L) {
        return(0)
    }
    attr(psi.mat, "free.pars") <- free.pars
    attr(psi.mat, "free.pars.pos") <- free.pars.pos
    psi.mat
}

## Coefficients between latent endogenous and observed
LAMBDA.Y <- function(model) {
    vars <- attr(model, "vars")
    var.names <- rownames(vars)
    observed <- !vars[,"lat"] 
    latents <- vars[,"lat"] | (!vars[,"lat"] & (vars[,"exo"] |
                                                vars[,"endo"]))
    loadings.y <- lapply(model, `[`, model$op == "=~" & !model$rhs %in%
                         var.names[vars[,"lat"]])
    lambda.y.mat <- matrix(0, ncol = sum(latents), nrow =
                           sum(observed))
    colnames(lambda.y.mat) <- var.names[latents]
    rownames(lambda.y.mat) <- var.names[observed]
    free.pars.pos <- NULL
    free.pars <- loadings.y$free[loadings.y$free > 0L]
    index.matrix <- lambda.y.mat
    index.matrix[] <- seq_along(index.matrix)
    latenterised <- var.names[!vars[,"lat"] & (vars[,"exo"] |
                                                vars[,"endo"])]
    for (i in latenterised) {
        lambda.y.mat[i, i] <- 1
    }
    for (i in seq_len(length(loadings.y$lhs))) {
        lhs <- loadings.y$lhs[i]
        rhs <- loadings.y$rhs[i]
        start.value <- loadings.y$ustart[i]
        is.free <- loadings.y$free[i] > 0L
        lambda.y.mat[rhs, lhs] <-
            loadings.y$ustart[i]
        if (is.free) {
            free.pars.pos <- c(free.pars.pos, index.matrix[rhs, lhs])
        }
    }
    if (length(lambda.y.mat) == 0L) {
        return(II(model))
        free.pars.pos <- NULL
        free.pars <- NULL
    }
    attr(lambda.y.mat, "free.pars") <- free.pars
    attr(lambda.y.mat, "free.pars.pos") <- free.pars.pos
    lambda.y.mat
}

## Variance/covariance of measurement errors of observed endogenous
THETA.EPSILON <- function(model) {
    vars <- attr(model, "vars")
    var.names <- rownames(vars)
    observed <- !vars[,"lat"]
    indicators <- !vars[,"lat"] & (!vars[,"exo"] &
                                                !vars[,"endo"])
    te <- matrix(0, sum(observed), sum(observed))
    obs.names <- var.names[observed]
    ind.names <- var.names[indicators]
    dimnames(te) <- list(obs.names, obs.names)
    var.covar <- lapply(model, `[`, model$op == "~~" & model$lhs
                        %in% ind.names)
    free.pars.pos <- free.pars <- NULL
    index.matrix <- te
    index.matrix[] <- seq_along(index.matrix)
    if (length(te) == 0L) {
        return(matrix(0))
    }
    for (i in seq_len(length(var.covar$lhs))) {
        lhs <- var.covar$lhs[i]
        rhs <- var.covar$rhs[i]
        start.value <- var.covar$ustart[i]
        pos <- var.covar$free[i]
        is.free <- pos > 0L
        if (lhs == rhs) {
            te[lhs, rhs] <- start.value
            if (is.free) {
                free.pars.pos <- c(free.pars.pos, index.matrix[lhs,
                                   rhs])
                free.pars <- c(free.pars, pos)
            }
        } else {
            te[lhs, rhs] <- te[rhs, lhs] <- start.value
            if (is.free) {
                free.pars.pos <- c(free.pars.pos, index.matrix[lhs,
                                   rhs], index.matrix[rhs, lhs])
                free.pars <- c(free.pars, rep(pos, 2))
            }
        }
    }
    attr(te, "free.pars") <- free.pars
    attr(te, "free.pars.pos") <- free.pars.pos
    te
}

## Get the number of degrees of freedom of a given model
model.df <- function(model) {
    ## t-rule
    ## consider equality constraints eventually
    nexo <- length(exogenous(model))
    nvar <- length(observed(model))
    npar <- max(model$free)
    model <- lapply(model, `[`, model$op == "~~" & model$free == 0 & model$lhs == model$rhs)
    nfix <- length(unique(c(model$lhs, model$rhs)))
    ndf <- nvar*(nvar + 1)/2 - npar - nfix*(nfix + 1)/2 
    list(free = npar, 
         fixed = nfix,
         total.df = ndf + npar,
         model.df = ndf)
}

## Function to extract the starting values of the free parameters of a
## model
jumpstart <- function(model) {
    model$ustart[model$free > 0L]
}

## Function to be passed to the optimizer to minimize the log likelihood
iterator <- function(pars, model, sigma, matrep, fun = "fml") {
    model$ustart[model$free > 0L] <- pars
    sigma.hat <- sem.matrices(model, matrep)
    do.call(fun, list(sigma, sigma.hat))
}

## This function takes the 4 basic matrices and returns a covariance
## matrix based on the model parameters.  It can be used to generate a
## 'population' cov matrix for testing purposes or to generate sigmas
## for optimization purposes for instance.

sem.matrices <- function(model, matrep) {
    if (!is.null(model$est)) {
        model$ustart <- model$est
    }
    if (any(is.na(model$ustart))) stop("Please give starting values for
                                       all coefficients in the model.")
    matrep$BE[attr(matrep$BE, "free.pars.pos")] <-
        model$ustart[match(attr(matrep$BE, "free.pars"), model$free)]
    matrep$PS[attr(matrep$PS, "free.pars.pos")] <-
        model$ustart[match(attr(matrep$PS, "free.pars"), model$free)]
    matrep$LY[attr(matrep$LY, "free.pars.pos")] <-
        model$ustart[match(attr(matrep$LY, "free.pars"), model$free)]
    matrep$TE[attr(matrep$TE, "free.pars.pos")] <-
        model$ustart[match(attr(matrep$TE, "free.pars"), model$free)]
    matrep$LY %*% solve(matrep$Inv - matrep$BE) %*% matrep$PS %*%
    solve(t(matrep$Inv - matrep$BE)) %*% t(matrep$LY) + matrep$TE
}

## Maximum-likelihood fitting function.
fml <- function(sigma, sigma.hat) {
    sigma.hat.inv <- try(chol2inv(chol(sigma.hat)), silent = TRUE)
    if (class(sigma.hat.inv) == "try-error") return(Inf)
    res <- (log(det(sigma.hat)) + 
    sum(sigma * sigma.hat.inv) -
    log(det(sigma)) -
    NROW(sigma.hat))
    res
}

## Generalised least squares.
fgls <- function(sigma, sigma.hat) {
    0.5 * sum(diag((solve(sigma) %*% (sigma - sigma.hat))^2))
}

## Function that calls the optimiser and builds a response
#fit.nlm <- function(model, sigma, n, fun = "fml") {
    #model <- lvnfy(model, data = sigma, n = n)
    #pars <- model$ustart[model$free > 0L]
    #SCALE <- ifelse(abs(pars) > 1.00, abs(1/pars), 1)
    #matrep <- matrix.representation(model)
    #rnames <- rownames(sigma.hat(matrep))
    #sigma <- sigma[rnames, rnames]
    #res <- nlm(p = pars, 
                  #f = iterator, 
                  #hessian = TRUE,
                  #check.analyticals = TRUE,
                  #model = model,
                  #fun = fun, 
                  #fscale = SCALE,
                  #sigma = sigma, 
                  #matrep = matrep)
    #hess <- res$hessian
    #std.errors <- se.sem(hess, n)
    #if (!std.errors$trust) warning("Could not compute standard errors.")
    #model$est <- model$ustart
    #model$est[model$free > 0L] <- res$estimate
    #model$se <- rep(0, length(model$id))
    #model$se[model$free > 0L] <- std.errors$values
    #model$z <- model$est / model$se
    #model$z <- ifelse(model$z == Inf, NA, model$z)
    #model$p <- round(do.call("pval", list(model$z)), 3)
    #res$model <- data.frame(model, stringsAsFactors = FALSE)
    #res$chisquared <- n * res$minimum
    #res$df <- model.df(model)$model.df
    #res$n <- n
    #res$pval <- 1 - pchisq(res$chisquared, res$df) 
    #res$estimator <- fun
    #res$sigma.hat <- sem.matrices(model, matrep)
    #res$sigma <- sigma
    #class(res) <- c("sem", "list")
    #invisible(res)
#}

#fit.optim <- function(model, sigma, n, fun = "fml") {
    #model <- lvnfy(model, data = sigma, n = n)
    #pars <- model$ustart[model$free > 0L]
    #SCALE <- ifelse(abs(pars) > 1.00, abs(1.00/pars), 1.00)
    #matrep <- matrix.representation(model)
    #rnames <- rownames(sigma.hat(matrep))
    #sigma <- sigma[rnames, rnames]
    #res <- optim(par = pars, 
                  #fn = iterator, 
                  #gr = gradit,
                  #hessian = TRUE,
                  #model = model,
                  #fun = fun, 
                  #sigma = sigma, 
                  #matrep = matrep,
                  #method = "L-BFGS-B")
    #hess <- res$hessian
    #std.errors <- se.sem(hess, n)
    #model$est <- model$ustart
    #model$est[model$free > 0L] <- res$par
    #model$se <- rep(0, length(model$id))
    #model$se[model$free > 0L] <- std.errors
    #model$z <- model$est / model$se
    #model$z <- ifelse(model$z == Inf, NA, model$z)
    #model$p <- round(do.call("pval", list(model$z)), 3)
    #res$model <- data.frame(model, stringsAsFactors = FALSE)
    #res$chisquared <- n * res$value
    #res$df <- model.df(model)$model.df
    #res$n <- n
    #res$pval <- 1 - pchisq(res$chisquared, res$df) 
    #res$estimator <- fun
    #res$sigma.hat <- sem.matrices(model, matrep)
    #res$sigma <- sigma
    #class(res) <- c("sem", "list")
    #res
#}

#fit.optimx <- function(model, sigma, n, fun = "fml") {
    #model <- lvnfy(model, data = sigma, n = n)
    #pars <- model$ustart[model$free > 0L]
    #SCALE <- ifelse(pars > 1.00, abs(pars/1.00), 1.00)
    #matrep <- matrix.representation(model)
    #rnames <- rownames(sigma.hat(matrep))
    #sigma <- sigma[rnames, rnames]
    #res <- optimx(par = pars, 
                  #fn = iterator, 
                  #gr = gradit,
                  #hessian = TRUE,
                  #model = model,
                  #fun = fun, 
                  #sigma = sigma, 
                  #matrep = matrep,
                  #control = list(factr = 1e-14, all.methods = TRUE),
                  #method = "L-BFGS-B")
    #hess <- res$hessian
    #std.errors <- se.sem(hess, n)
    #model$est <- model$ustart
    #model$est[model$free > 0L] <- res$par
    #model$se <- rep(0, length(model$id))
    #model$se[model$free > 0L] <- std.errors
    #model$z <- model$est / model$se
    #model$z <- ifelse(model$z == Inf, NA, model$z)
    #model$p <- round(do.call("pval", list(model$z)), 3)
    #res$model <- data.frame(model, stringsAsFactors = FALSE)
    #res$chisquared <- n * res$value
    #res$df <- model.df(model)$model.df
    #res$n <- n
    #res$pval <- 1 - pchisq(res$chisquared, res$df) 
    #res$estimator <- fun
    #res$sigma.hat <- sem.matrices(model, matrep)
    #res$sigma <- sigma
    #class(res) <- c("sem", "list")
    #res
#}

## Using stats::nlminb to try and find the best set of parameter
## estimates for a given model. We tried a bunch of other functions, but
## nlminb seems to be the most reliable.
fit.nlminb <- function(model, sigma, n, fun = "fml") {

    ## Setting things up
    model <- lvnfy(model, data = sigma, n = n)
    pars <- model$ustart[model$free > 0L]

    ## Setting the scale (got the idea from lavaan)
    SCALE <- ifelse(abs(pars) > 1.00, abs(1.00/pars), 1)
    matrep <- matrix.representation(model)
    rnames <- rownames(sigma.hat(matrep))

    ## Making sure sigma follows sigma.hat
    sigma <- sigma[rnames, rnames]

    ## Optimizer (finding a set of parameter estimates)
    res <- nlminb(start = pars, 
                  objective = iterator, 
                  gradient = first.derivative.param,
                  ## Using numDeriv::grad works as well
                  #gradient = gradit,
                  model = model,
                  fun = fun, 
                  scale = SCALE,
                  sigma = sigma, 
                  matrep = matrep,
                  control = list(iter.max = 10000, eval.max = 20000))

    ## Calculating the Hessian (standard errors come from this since
    ## this is the inverted of the vcov). There is no reason to expect
    ## this to be pd if there is multiple minima or if the best set of
    ## parameters wasnt found.
    hess <- hessian(func = hessmin, 
                    x = res$par, 
                    sigma = sigma,
                    matrep = matrep,
                    model = model, 
                    fun1 = fun)

    ## Gradients (indicate if the optimizer reach the minima). Here we
    ## used numDeriv::grad, which is a bit slower than
    ## lavaan::first.derivative.param.
    gradd <- grad(func = gradmin, 
                  x = res$par,
                  sigma = sigma,
                  matrep = matrep,
                  model = model,
                  fun = fun)

    ## Checking if any of the eigenvalues are positive.
    ## vcov and standard errors come from the inverse of this matrix. If
    ## it is not invertible, there is no way to calculate the confidence
    ## intervals.
    if (any(eigen(hess)$values < 0L)) {
        stop("Hessian matrix is not positive-definite. \ 
The optimizer did not converge to a unique \
solution. The model is probably empirically \
underidentified.")
    }

    ## Any gradient not close to 0 might indicate that the system did
    ## not reach the minima.
    #if (any(gradd > 1)) { 
        #warning("Some first order derivative are not \
#close to 0. The system probably did \
#not reach the minima. Results might \
#not be thurstworthy.")
    #}

    ## More housecleaning and setting up
    freeid <- model$id[model$free > 0L]
    names(gradd) <- paste(model$lhs[freeid], model$op[freeid],
                         model$rhs[freeid], sep = "")
    colnames(hess) <- rownames(hess) <- names(gradd)

    ## Calculating standard errors
    std.errors <- se.sem(hess, n)
    if (!std.errors$trust) warning("Could not compute standard errors.")

    ## Building the response
    model$est <- model$ustart
    model$est[model$free > 0L] <- res$par
    model$se <- rep(0, length(model$id))
    model$se[model$free > 0L] <- std.errors$values

    ## Calculating z-values
    model$z <- model$est / model$se
    model$z <- ifelse(model$z == Inf, NA, model$z)

    ## Significance of the parameter estimates
    model$p <- round(do.call("pval", list(model$z)), 3)

    out <- model[-1]
    swap <- which(out$op == "~")
    var1 <- out$lhs
    var2 <- out$rhs
    var1[swap] <- out$rhs[swap]
    var2[swap] <- out$lhs[swap]
    out$lhs <- var1
    out$rhs <- var2
    out$op[out$op == "~"] <- "-->"
    out$op[out$op == "~~"] <- "<->"
    out$op[out$op == "=~"] <- "-->"
    out$free <- ifelse(out$free > 0, "free", "fixed")
    out$sig <- rep("*", length(out$lhs))
    out$sig[out$p >= 0.05] <- "ns"
    out$sig[is.na(out$p)] <- NA

    res$model <- data.frame(out, stringsAsFactors = FALSE)
    colnames(res$model) <- c("var1", "op", "var2", "type", "start", "est", "se",
                             "z", "p", "sig")

    ## Calculating MLX2
    res$chisquared <- n * res$objective

    ## Number of degrees of freedom
    res$df <- model.df(model)$model.df

    ## Number of observations
    res$n <- n

    ## Significance of the MLX2
    res$pval <- 1 - pchisq(res$chisquared, res$df) 

    ## Which function was minimised?
    res$estimator <- fun

    ## Calculating the sigma hat with the best set of estimates
    res$sigma.hat <- sem.matrices(model, matrep)

    ## Setting up stuff
    res$sigma <- sigma
    res$grad <- res$gradd
    res$hessian <- hess
    class(res) <- c("sem", "list")
    invisible(res)
}

gradit <- function(pars, model, matrep = matrep, sigma, fun) {
    grad(func = gradmin, x = pars, sigma = sigma,
         model = model, matrep = matrep, fun = fun)
}

gradmin <- function(pars, model, matrep = matrep, sigma, fun) {
    model$ustart[model$free > 0L] <- pars
    sigma.hat <- sem.matrices(model, matrep)
    do.call(fun, list(sigma, sigma.hat))
}

hessmin <- function(pars, model, matrep = matrep, sigma, fun1) {
    model$ustart[model$free > 0L] <- pars
    sigma.hat <- sem.matrices(model, matrep)
    do.call(fun1, list(sigma, sigma.hat))
}

hessit <- function(pars, model, sigma, matrep, fun1) {
    hessian(func = hessmin, x = pars, sigma = sigma,
        model = model, matrep = matrep, fun1 = fun1)
}

se.sem <- function(hess, n) {
    ## From SAS:
    ## http://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug
    ## _tcalis_sect072.htm
    hess.inv <- try(chol2inv(chol(hess)))
    if (class(hess.inv) == "try-error") {
        return(list(trust = FALSE, values = rep(NA, NROW(hess))))
    } else {
        return(list(trust = TRUE, values =
                    sqrt(diag(2/(n-1)*hess.inv))))
    }
}

## P-values of parameter estimates
pval <- function(z) {
    2 * (1 - pnorm(abs(z)))
}

## Residuals
residuals.sem <- function(object, ...) {
    round(object$sigma - object$sigma.hat, 4)
}

## Most of this function was taken from lavaan.
RMSEA <- function(fitted.model) {
    rmsea <- sqrt(max(c((fitted.model$chisquared/fitted.model$n)/fitted.model$df - 1/fitted.model$n, 0)))
    lower.lambda <- function(lambda) {
        pchisq(fitted.model$chisquared, df=fitted.model$df, ncp=lambda) - 0.95
    }
    if (fitted.model$df < 1 || lower.lambda(0) < 0) {
        ci.lower <- 0  
    } else {
        lambda.l <- try(uniroot(f=lower.lambda, lower=0, upper=fitted.model$chisquared)$root)
        if(inherits(lambda.l, "try-error")) { lambda.l <- NA }
        ci.lower <- sqrt( lambda.l/(fitted.model$n*fitted.model$df) )
    }    
    upper.lambda <- function(lambda) {
        pchisq(fitted.model$chisquared, df=fitted.model$df, ncp=lambda) - 0.05
    }
    N.RMSEA <- max(fitted.model$n, 10000L)
    if (fitted.model$df < 1 || upper.lambda(N.RMSEA) > 0) {
        ci.upper <- 0
    } else {
        lambda.u <- try(uniroot(f=upper.lambda, lower=0, upper=N.RMSEA)$root)
        if(inherits(lambda.u, "try-error")) { lambda.u <- NA }
        ci.upper <- sqrt( lambda.u/(fitted.model$n*fitted.model$df) )
    }
    p <- 1 - pchisq(fitted.model$chisquared, df=fitted.model$df, ncp=(fitted.model$n*fitted.model$df*0.05^2))
    list(RMSEA = rmsea, ci = c(lower = ci.lower, upper = ci.upper), p = p)
}

## Borrowed from lavaan
first.derivative.param <- function(x, model, matrep, sigma, fun) {
    npar <- length(x)
    h <- 10e-6
    dx <- numeric(npar)

    for(i in 1:npar) {
        x.left <- x.left2 <- x.right <- x.right2 <- x
        x.left[i]  <- x[i] - h; x.left2[i]  <- x[i] - 2*h
        x.right[i] <- x[i] + h; x.right2[i] <- x[i] + 2*h
        fx.left   <- iterator(x.left, model = model, matrep = matrep,
                              sigma = sigma, fun = fun)
        fx.left2  <- iterator(x.left2, model = model, matrep = matrep,
                              sigma = sigma, fun = fun)
        fx.right  <- iterator(x.right, model = model, matrep = matrep,
                              sigma = sigma, fun = fun)
        fx.right2 <- iterator(x.right2, model = model, matrep = matrep,
                              sigma = sigma, fun = fun)
        dx[i] <- (fx.left2 - 8*fx.left + 8*fx.right - fx.right2)/(12*h)
    }
    dx
}

summary.sem <- function(object, ...) {
    space <- "\n\n"
    if (object$convergence != 0) {
        txt.con <- "The optimizer did not converge after %s iterations. Results are not trustworthy."
        cat(space, sprintf(txt.con, object$iterations), space)
    } else {
        txt.con <- "The optimizer successfully converged after %s iterations"

        cat(sprintf(txt.con, object$iterations), space)

        cat("MODEL FIT", space)

        cat(sprintf("Method %30s", toupper(object$estimator)), "\n")
        cat(sprintf("Model chi-square %20.3f", object$chisquared), "\n")
        cat(sprintf("P-value %29.3f", object$pval), "\n")
        cat(sprintf("Degrees of freedom %18i", object$df), "\n")
        cat(sprintf("Number of observations %14i", object$n), space)

        cat("PARAMETER ESTIMATES\n\n")

        #cat(sprintf("%30s %10s %10s %10s", "est", "std.error", "z",
                    #"p-value"), "\n")
        #for (i in object$model$id) {
            #cat(sprintf("%-5s %2s %5s %15.3f %10.3f %10.3f %10.3f", object$model$lhs[i],
                        #object$model$op[i], object$model$rhs[i], object$model$est[i],
                        #object$model$se[i], object$model$z[i], object$model$p[i]),
                #"\n")
            #ops <- unique(object$model$op[i:(i+1)])
            #if (length(ops) > 1) {
                #cat("\n")
            #}
        #}
        print(object$model, digits = 3)
    }
}

variables <- function(model) {
    allvars <- unique(c(model$lhs, model$rhs))
    latent <- unique(model$lhs[model$op == "=~"])
    indicators <- unique(c(model$rhs[model$op == "=~" & !model$rhs %in%
                latent]))
    regressions <- lapply(model, `[`, model$op == "~" | (model$op == "=~"
                & model$rhs %in%
                latent))
    exogenous <- unique(regressions$rhs[!regressions$rhs %in%
            regressions$lhs])
    endogenous <- unique(regressions$lhs)
    latent.allvars <- allvars %in% latent
    indicators.allvars <- allvars %in% indicators
    exogenous.allvars <- allvars %in% exogenous
    endogenous.allvars <- allvars %in% endogenous
    #if (!all(exogenous.allvars) & ) exogenous.allvars <- latent.allvars
    #latent.allvars
    out <- cbind(latent.allvars, indicators.allvars, exogenous.allvars,
        endogenous.allvars)
    rownames(out) <- allvars
    colnames(out) <- c("lat", "ind", "exo", "endo")
    out
}

matrix.representation <- function(model) {
    BE <- BETA(model)
    PS <- PSI(model)
    LY <- LAMBDA.Y(model)
    TE <- THETA.EPSILON(model)
    Inv <- II(model)
    list(BE = BE, PS = PS, LY = LY, TE = TE, Inv = Inv)
}

sigma.hat <- function(matrep) {
    matrep$LY %*% solve(matrep$Inv - matrep$BE) %*% matrep$PS %*%
    solve(t(matrep$Inv - matrep$BE)) %*% t(matrep$LY) + matrep$TE
}

boot.lavaan <- function(fitted.model, n) {
    sigma.hat <- fitted.values(fitted.model)$cov
    nobs <- fitted.model@Sample@nobs[[1]]
    mat <- mvrnorm(Sigma = sigma.hat, n = nobs, mu = rep(0,
        NCOL(sigma.hat)), empirical = TRUE)
    est.coefs <- coef(fitted.model)
    X2 <- numeric(n)
    coefs <- matrix(0, nrow = length(est.coefs), ncol = n)
    for (i in 1:n) {
        success <- FALSE
        while (!success) {
            Z <- mat[sample(1:NROW(mat), replace = TRUE), ]
            new.fit <- update(fitted.model, sample.cov =
                                              cov(Z), warn = FALSE)
            if (sum(new.fit@Fit@se > 0) == length(est.coefs)) {
                success <- TRUE
                X2[i] <- new.fit@Fit@test[[1]]$stat
                coefs[, i] <- coef(new.fit)
            }
        }
    }
    p.X2 <- sum(X2 > fitted.model@Fit@test[[1]]$stat)/n
    se <- apply(coefs, 1, sd)
    z <- est.coefs/se
    p.est <- pval(z)
    invisible(list(X2 = X2, p.X2 = p.X2, est = est.coefs, se = se, z = z, p.est =
                   p.est, coefs = coefs))
}
Any scripts or data that you put into this service are public.
stremo documentation built on Jan. 15, 2017, 7:33 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
stremo
Functions to help the process of learning structural equation modelling

R/stremo.R
In stremo: Functions to help the process of learning structural equation modelling

Try the stremo package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

stremo Functions to help the process of learning structural equation modelling

R/stremo.R In stremo: Functions to help the process of learning structural equation modelling

Try the stremo package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

stremo
Functions to help the process of learning structural equation modelling

R/stremo.R
In stremo: Functions to help the process of learning structural equation modelling