R/uSEM.R
In pompom: Person-Oriented Method and Perturbation on the Model

Documented in model_summary parse_beta uSEM

# prepare stacked time-series with the given lag.order, in the long-format
prepare.data <- function(time.series, lag.order = 1)
{
  time.series <- data.frame(time.series)
  n.obs <- nrow(time.series)
  var.number <- length(time.series[1,])
  time.series.lag1 <- time.series[1:(n.obs-1),]
  time.series.stacked <- cbind(time.series.lag1, time.series[2:n.obs,])
  # names(time.series.stacked) <- paste("y", 1:(2*var.number), sep = "")
  names(time.series.stacked) <- c(paste(names(time.series), ".lag.obs", sep = ""),
                                  paste(names(time.series), ".obs", sep = ""))
  # print(names(time.series.stacked))
  return (time.series.stacked)
}
### Function1: find.path.to.free.up
# this is a recursive function that find the path that satisfies two conditions:
# 1) the reversed direction was not included in the model before, to avoid bidirectional paths
# 2) has the largest modification indices of all who satisfy 1).
# 3) the modification indice of this path is greaterthan 3.841
# return value: the path to free up in the format of one row of modification indices, which
# is an output of function "modindices" in lavaan

find.path.to.free.up <- function(beta.mi, model.syntax, var.number){

  if (nrow(beta.mi) > 0)
  {
    # this is the path with largest mi
    largest.path <- beta.mi[which(beta.mi$mi == max(beta.mi$mi)), ]

    if (length(largest.path$lhs) > 1)
    {
      print("two paths with equal improvement to model!")
      #If there are two largest and equal improvement of modification indices,
      # we always pick the first one in order, so that we produce stable model estimates every time.
      largest.path <- largest.path[1,]
      mi <- largest.path$mi
    }
    else(
      mi <- largest.path$mi
    )

    # a good enough improvement of chisq
    if (mi > 3.841)
    {
      path.to.add <- paste(largest.path$lhs, "~", largest.path$rhs, "\n ",sep = "")

      # to test if either direction is already included in the following if statement
      path.to.add.reverse <- paste(largest.path$rhs, "~", largest.path$lhs, "\n ",sep = "")

      lhs.number <- as.numeric(substr(largest.path$lhs,4,nchar(largest.path$lhs)))
      rhs.number <- as.numeric(substr(largest.path$rhs,4,nchar(largest.path$rhs)))

      # this path was already in the model, in the reversed direction;
      # then the mi of that path in the current iteration is fixed to 0
      if (!identical(grep(path.to.add.reverse, model.syntax), integer(0)) )
      {
        # print("found reverse path in the model!")

        # make the largest value in the modification indices equal to zero, so that it can search other values
        beta.mi[which(beta.mi$lhs == largest.path$lhs & beta.mi$rhs == largest.path$rhs), ]$mi <- 0

        # pass the modification indices back into the function, run it recursively
        return (find.path.to.free.up(beta.mi, model.syntax, var.number))
      }
      else
      {
        return(largest.path)
      }
    }
    else{ # no good improvement
      return (NULL)
    }
  }
  else{return (NULL)}
}



#' Fit a multivariate time series with uSEM (unified Structural Equation Model).
#' @usage
#' uSEM(var.number,
#'      data,
#'      lag.order = 1,
#'      verbose = FALSE,
#'      trim = FALSE)
#'
#' @param var.number number of variables in the time series
#' @param data time series data, must be in long format
#' @param lag.order lag order of the model to be fit, default value is 1. Note: Higher order (greater than 1) might not run.
#' @param verbose print intermediate model fit (iterations), default value is FALSE
#' @param trim to trim the insignificant betas (just one step, not iterative), default value is FALSE
#'
#' @return model fit object generated by lavaan
#'
#' @details The purpose of uSEM is to quantify the temporal relations (both contemporaneous and lag-1) between
#' variables. Model specification and estimation can be found in the references.
#'
#'
#'
#' @references Kim, J., Zhu, W., Chang, L., Bentler, P. M., & Ernst, T. (2007). Unified Structural Equation Modeling Approach for the Analysis of Multisubject, Multivariate Functional MRI Data. Human Brain Mapping, 93, 85–93. doi:10.1002/hbm.20259
#' @references  Gates, K. M., & Molenaar, P. C. M. (2012). Group search algorithm recovers effective connectivity maps for individuals in homogeneous and heterogeneous samples. NeuroImage 63(1), 310-319. doi: 10.1016/j.neuroimage.2012.06.026
#' @references  Gates, K. M., Molenaar, P. C. M., Hillary, F. G., Ram, N., & Rovine, M. J. (2010). Automatic search for fMRI connectivity mapping: An alternative to Granger causality testing using formal equivalences among SEM path modeling, VAR, and unified SEM. NeuroImage, 50(3), 1118–1125.  doi: 10.1016/j.neuroimage.2009.12.117
#'
#'
#' @examples
#' \dontshow{
#' model.fit <- uSEM(var.number = 3,
#'                  data = simts_3node,
#'                  lag.order = 1,
#'                  verbose = FALSE,
#'                  trim = FALSE)
#'model.fit
#' }
#' \donttest{
#'model.fit <- uSEM(var.number = 3,
#'                  data = simts_3node,
#'                  lag.order = 1,
#'                  verbose = FALSE,
#'                  trim = FALSE)
#'model.fit
#' }
#'
#'@export
#'
uSEM <- function(var.number, data, lag.order = 1, verbose = FALSE, trim = FALSE) {

  upper.beta.regression.syntax <- ""
  lower.beta.regression.syntax <- ""

  measurement.syntax <- ""
  variance.syntax <- ""

  chisq.list <- NULL
  path.list <- NULL

  data <- prepare.data(data, lag.order)
  # var.names <- names(data)

  # to write syntax for beta matrix (LISREL convention) or aka regression syntax (lavaan convention)
  # "~0*" made the modification indices open up about beta elements, but still
  # you need to eliminate those who are not in phi or alpha matrix.

  # By uSEM definition, the upper half of beta matrix is fixed to 0, because eta(t-1) is not esitmated here.
  # if lag.order > 1, then this is the uppper block with all lagged latent variables (e.g. upper 2/3 if order is 2)
  for (i in 1:(lag.order*var.number))
  {
    for (j in 1:((lag.order+1)*var.number))
    {
      upper.beta.regression.syntax <- paste(upper.beta.regression.syntax,
                                            "eta", i, "~0*","eta", j, "\n ",sep = "")
    }
  }

  # lower half of beta matrix. for phi matrix (lag-1 temporal relations), open up diagnal line, AR = T
  # (this is also to avoid bidirectional paths in contemporaneous relations suggesetd by modification indices)
  # if lag.order > 1, then this is the lower block with the contemporaneous latent variables
  for (i in (1+(lag.order*var.number)):((lag.order+1)*var.number))
  {
    lower.beta.regression.syntax <- paste(lower.beta.regression.syntax,
                                          "eta", i, "~eta",i-var.number, "\n ",sep = "")
  }
  regression.syntax <- paste(upper.beta.regression.syntax, lower.beta.regression.syntax, sep = "")

  # summary for beta matrix set up:
  # so in uSEM, the beta matrix is fixed at 0 at the upper half (for lag-1),
  # and the diagonal line of southwest block (phi matrix) is freed up
  # everything else is left to be fitted in the automatic search process later

  # to write syntax for measurement syntax, fix factor loading to 1 and measurement error to 0
  # for (i in 1:(var.number*(lag.order+1)))
  # {
  #   measurement.syntax <- paste(measurement.syntax,
  #                               "eta", i, "=~1 * y", i, "\n ",sep = "")
  # }

  for (i in 1:(var.number*(lag.order+1)))
  {
    measurement.syntax <- paste(measurement.syntax,
                                "eta", i, "=~1 * ", names(data)[i], "\n ",sep = "")
  }



  # to write syntax for variance.syntax, part 1, upper left (northwest block) being symmetric
  for (i in 1:(lag.order*var.number)){
    for (j in i:(lag.order*var.number))
    {
      variance.syntax <- paste(variance.syntax, "eta", i, "~~", "eta", j,  "\n ",sep = "")
    }
  }
  # to write syntax for variance.syntax, part 2, lower right (southeast block) being diagonal
  for (i in (lag.order * var.number + 1) : ((lag.order+1) * var.number))
  {
    variance.syntax <- paste(variance.syntax, "eta", i, "~~", "eta", i,  "\n ",sep = "")
  }

  # assemble regression syntex, measurement syntax and variance syntax
  # in LISREL, these 3 parts are all you need to specify as well
  empty.model <- paste(upper.beta.regression.syntax, measurement.syntax, variance.syntax, sep = "")

  model.syntax <- paste(regression.syntax, measurement.syntax, variance.syntax, sep = "")


  # iteration of searching modification indices
  # fit.criteria <- 1 # initial value
  iteration <- 0
  largest.path <- "initial model" # arbitrary initial value of modification indices

  while(!is.null(largest.path))
  {
    if (iteration > 0 & iteration < var.number * (var.number-1) * 2)
      # var.number * (var.number-1) * 2 is the number of empty cells in lag-1 and contemporaneous matrix
    {
      path.to.add<- paste(largest.path$lhs, "~", largest.path$rhs, "\n ",sep = "")
      model.syntax <- paste(model.syntax, path.to.add, sep = "")
    }
    model.fit <- tryCatch(lavaan(model.syntax,
                           data=data,
                           model.type      = "sem",
                           missing         = "fiml",
                           estimator       = "ml"),
                    error=function(e) e)

    error.message <- tryCatch(inspect(model.fit, "fit"),
                              error=function(e) e)
    check.npd            <- any(grepl("error", class(model.fit)) == TRUE) #  non positive definite
    check.not.identified <- sum(lavInspect(model.fit,"se")$beta, na.rm = TRUE) == 0
    singular            <- tryCatch(modindices(model.fit), error = function(e) e)
    check.singular      <- any(grepl("singular", singular) == TRUE)
    check.error         <- any(grepl("error", class(singular)) == TRUE)
    converge            <- lavInspect(model.fit, "converged")

    if (check.npd | check.not.identified | check.singular | check.error | !converge)
    {
      return (model.fit)
    }

    if (any(grepl("error", class(error.message)) == TRUE) )
    {
      return (model.fit)
    }
    else
    {

      mi <- modindices(model.fit)

      beta.mi <- mi[which(mi$op == "~"),]
      # only keep the lower block in the modification indices
      beta.mi$lhs.number <-  as.numeric(gsub("eta","", beta.mi$lhs))
      beta.mi <- beta.mi[beta.mi$lhs.number > lag.order * var.number,]
      # beta.mi.ordered <-  beta.mi[order(-beta.mi$mi),]
      beta.mi.ordered <-  beta.mi[sort.list(-beta.mi$mi),]

      largest.path <- find.path.to.free.up(beta.mi = beta.mi.ordered,
                                             model.syntax = model.syntax,
                                             var.number = var.number)
      iteration <- iteration + 1
      if (verbose & nrow(beta.mi.ordered) > 0)
      {
        print(paste("Iteration: ", iteration))
        print("Modification indices of beta: ")
        print(beta.mi.ordered[1:min(5, nrow(beta.mi.ordered)), ])
        print(paste("largest improvement:", largest.path$lhs,"~", largest.path$rhs))
      }
    }
  }

  # if we decide to trim the data
  if (trim)
  {
    beta.estimates <- parse.beta.as.estimates (var.number,
                                               model.fit)

    # you don't have to fit the model again, check if there is any trimming first

    # assemble the syntax in lavaan style
    beta.syntax <- paste(beta.estimates$lhs, "~", beta.estimates$rhs, "\n", sep = "")
    final.model <- empty.model
    for (element in beta.syntax)
    {final.model <- paste(final.model, element)}
    # fit the model by this syntax
    model.fit <- tryCatch(lavaan::lavaan(final.model,
                                   data=data,
                                   model.type      = "sem",
                                   missing         = "fiml",
                                   estimator       = "ml"  ),
                    error=function(e) e)
  }
  # if trim is TRUE, then return the trimmed model.fit; if trim is FALSE, then return the iterated result
  return (model.fit)
}


# parse.beta.as.estimates <- function(var.number, model.fit, sigonly = F)
parse.beta.as.estimates <- function(var.number, model.fit)
{
  all.estimates <- parameterEstimates(model.fit)
  # beta.est <- all.estimates[which(all.estimates$op == "~" & all.estimates$pvalue < 0.05 ),]
  beta.est <- all.estimates[which(all.estimates$op == "~" ),]
  beta.est <- beta.est[which(!is.na(beta.est$pvalue)),]
  return(beta.est)
}


# parse.beta.as.matrix <- function(var.number, model.fit, lag.order, sigonly = F)
parse.beta.as.matrix <- function(var.number, model.fit, lag.order)
{
  all.estimates <- parameterEstimates(model.fit)
  beta.est <- all.estimates[which(all.estimates$op == "~" ),]
  beta.est <- beta.est[which(!is.na(beta.est$pvalue)),]

  # se, beta.est$se to get distribution of beta, can write another function to parse the beta estimates

  beta.matrix <- matrix(rep(0, (var.number*(lag.order + 1))^2), nrow = (var.number*(lag.order+1)), ncol = (var.number*(lag.order+1)), byrow = TRUE)
  lhs.number <- as.numeric(gsub("eta","", beta.est$lhs))
  rhs.number <- as.numeric(gsub("eta","", beta.est$rhs))

  for(i in 1:length(beta.est[,1]))
  {
    beta.matrix[lhs.number[i], rhs.number[i]] <- beta.est$est[i]
  }
  return (beta.matrix)
}

#' Parse the beta from model fit object
#'
#' @usage parse_beta(var.number, model.fit, lag.order, matrix = F)
#'
#' @param var.number number of variables in the time series
#' @param model.fit model fit object generated by lavaan
#' @param lag.order lag order of the model to be fit
#' @param matrix output beta in matrix format or estimates format, default value is FALSE (as estimates)
#'
#' @return beta
#'
#'
#' @examples
#' \dontshow{
#' data(usemmodelfit)
#'beta.matrix <- parse_beta(var.number = 3,
#'                          model.fit = usemmodelfit,
#'                          lag.order = 1,
#'                          matrix = TRUE)
#'beta.matrix
#' }
#' \donttest{
#' data(usemmodelfit)
#'beta.matrix <- parse_beta(var.number = 3,
#'                          model.fit = usemmodelfit,
#'                          lag.order = 1,
#'                          matrix = TRUE)
#'beta.matrix
#' }
#'
#' @export
#'
parse_beta <- function(var.number, model.fit, lag.order, matrix = F)
{
  if (matrix)
  {
    return.obj <- list(est = parse.beta.as.matrix(var.number, model.fit, lag.order),
                       se = parse.beta.se(var.number, model.fit, lag.order))
  }
  else{
    return.obj <- parse.beta.as.estimates(var.number, model.fit)
  }
  return(return.obj)
}

# the following function is not referenced in teh two R files
# This function is to parse the lavaan output to a matrix format for SE of beta estimates.
# parse.beta.se <- function(var.number, model.fit, lag.order, sigonly = F)
parse.beta.se <- function(var.number, model.fit, lag.order)
{
  all.estimates <- parameterEstimates(model.fit)
  beta.est <- all.estimates[which(all.estimates$op == "~" ),]
  beta.est <- beta.est[which(!is.na(beta.est$pvalue)),]

  beta.se.matrix <- matrix(rep(0, (var.number*(lag.order + 1))^2), nrow = (var.number*(lag.order+1)), ncol = (var.number*(lag.order+1)), byrow = TRUE)
  lhs.number <- as.numeric(gsub("eta","", beta.est$lhs))
  rhs.number <- as.numeric(gsub("eta","", beta.est$rhs))

  for(i in 1:length(beta.est[,1]))
  {
      beta.se.matrix[lhs.number[i], rhs.number[i]] <- beta.est$se[i]
  }
  return (beta.se.matrix)
}


### Function4: parse.psi
# This function is tp parse the lavaan output to a matrix format for Psi matrix
parse.psi <- function(var.number, model.fit, lag.order) # pass the modelfit (lavaan)
{
  all.estimates <- parameterEstimates(model.fit)
  psi.est <- all.estimates[which(all.estimates$op == "~~" ),]
  psi.est <- psi.est[which(!is.na(psi.est$pvalue)),]

  psi.matrix <- matrix(rep(0, (var.number*(lag.order + 1))^2), nrow = (var.number*(lag.order+1)), ncol = (var.number*(lag.order+1)), byrow = TRUE)

  lhs.number <- as.numeric(gsub("eta","", psi.est$lhs))
  rhs.number <- as.numeric(gsub("eta","", psi.est$rhs))

  for(i in 1:length(psi.est[,1]))
  {
    psi.matrix[lhs.number[i], rhs.number[i]] <- psi.est$est[i]
    psi.matrix[ rhs.number[i], lhs.number[i]] <- psi.est$est[i]
  }
  return (psi.matrix)
}

#' Provide model summary.
#'
#' @usage model_summary(model.fit, var.number, lag.order)
#'
#' @param model.fit model fit object generated by lavaan
#' @param var.number number of variables in the time-series
#' @param lag.order lag order of model
#'
#' @return beta matrix estimates
#' @return matrix of standard error of beta
#' @return matrix of psi estimates
#' @return fit statistics CFI
#' @return fit statistics TLI
#' @return fit statistics RMSEA
#' @return fit statistics SRMR
#'
#'@details Model fit criteria: 3 out of 4 rule,
#'meaning 3 out of 4 critea should be satisfied,
#'including CFI and TLI should be greater than 0.95,
#'RMSEA and SRMR should be less than 0.08.
#'
#'@examples
#' \dontshow{
#' mdl <- model_summary(model.fit = usemmodelfit,
#'                      var.number = 3,
#'                      lag.order = 1)
#' mdl$beta
#' mdl$beta.se
#' mdl$psi
#' mdl$cfi
#' mdl$tli
#' mdl$rmsea
#' mdl$srmr
#' }
#' \donttest{
#' mdl <- model_summary(model.fit = usemmodelfit,
#'                      var.number = 3,
#'                      lag.order = 1)
#' mdl$beta
#' mdl$beta.se
#' mdl$psi
#' mdl$cfi
#' mdl$tli
#' mdl$rmsea
#' mdl$srmr
#' }
#'
#'@export
#'
#'
model_summary <- function(model.fit, var.number, lag.order) # pass the modelfit (lavaan)
{

  # check if model fit successfully
  check.npd            <- any(grepl("error", class(model.fit)) == TRUE) # non positive definite
  check.not.identified <- sum(lavInspect(model.fit,"se")$beta, na.rm = TRUE) == 0
  singular            <- tryCatch(modindices(model.fit), error = function(e) e)
  check.singular      <- any(grepl("singular", singular) == TRUE)
  check.error         <- any(grepl("error", class(singular)) == TRUE)
  converge            <- lavInspect(model.fit, "converged")

  if (check.npd | check.not.identified | check.singular | check.error | !converge)
  {
    if (!converge){
      print( "not converged!")
    } else{
      print( "model did not fit!")
    }
  }else{
    # no trimming, because some cases trimming didn't work
    mi <- modindices(model.fit)
    # mi[mi$op == "~",] # why does MI include the upper right block of beta????
    # fit.meausres <- fitMeasures(model.fit)

    # summary(fit)
    fit.stat <- fitMeasures(model.fit)
    estimates <- parameterEstimates(model.fit)
    # beta.estimates <- estimates[estimates$op == "~", ]
    psi.estimates <- estimates[estimates$op == "~~", ]
    psi.estimates <- psi.estimates[which(!is.na(psi.estimates$pvalue)),]

    beta.matrix <- parse_beta(var.number,model.fit, lag.order, matrix = T)
    beta.estimates<- parse_beta(var.number,model.fit, lag.order, matrix = F)
    beta.se <- parse.beta.se(var.number,model.fit, lag.order)
    # beta.matrix <- beta.matrix[(var.number+1):(var.number*2), 1:(var.number*2)]

    psi.matrix <- parse.psi(var.number,model.fit, lag.order)
    result <- list(beta = beta.matrix$est,
                   beta.se = beta.matrix$se,
                   psi = psi.matrix,
                   chisq = fit.stat["chisq"],
                   cfi = fit.stat["cfi"],
                   tli = fit.stat["tli"],
                   rmsea = fit.stat["rmsea"],
                   srmr = fit.stat["srmr"])
    return(result)
  }
}