R/processData.R

Defines functions processData

Documented in processData

#' Process data.
#'@keywords internal
processData <- function(data = data, 
                        sample.cov = sample.cov,
                        sample.mean = sample.mean, 
                        sample.nobs = sample.nobs, 
                        ordered = ordered,
                        missing = missing,
                        se = se,
                        pt = pt){
  
  # if the user supplied a covariance matrix.
  if (is.null(data)){
    
    if (is.null(sample.mean) ){
      sample.mean <- rep(0L, ncol(sample.cov))
      names(sample.mean) <- colnames(sample.cov)
    }
    
    if (!is.null(ordered)){
      stop(paste("miive: if categorical.vars are declared raw data is required."))
    }
    
    sample.sscp        <- buildSSCP(sample.cov, sample.mean, sample.nobs)
    sample.polychoric  <- NULL
    asymptotic.cov     <- NULL
    asymptotic.cov.sat <- NULL
    var.nobs           <- NULL
    var.categorical    <- NULL
    var.missing        <- NULL
    var.exogenous      <- NULL
  
  # the user supplied raw data
  } else {
    
    asymptotic.cov     <- NULL
    asymptotic.cov.sat <- NULL
    
    # Data-level characteristics
    sample.nobs     <- nrow(data)
    
    # Variable level characteristics
    var.categorical <- vapply(data, is.factor, c(is.factor=FALSE))
    var.nobs        <- nrow(data) - colSums(is.na(data))
    var.missing     <- sapply(var.nobs, function(x) {
      ifelse(x==sample.nobs, FALSE, TRUE)
    })
 
    # Exogenous variables in the dataset
    var.exogenous <- colnames(data) %in%
      pt[ pt$exo == 1L & pt$op  == "~~" & pt$lhs == pt$rhs, "rhs" ]
    
    names(var.exogenous) <- colnames(data)
    
    #-------------------------------------------------------# 
    # Ordered factors present in data
    #-------------------------------------------------------# 
    
    if( any(var.categorical) | !is.null(ordered) ){ 
      
      # there are factors in the user-upplied data
      if (any(var.categorical)) { 
        
        ## if there are undeclared factors throw an error
        if (length(setdiff(colnames(data)[var.categorical],ordered)) > 0){
          und.factors <- setdiff(colnames(data)[var.categorical],ordered)
          stop(paste0(
            "miive: the following undeclared factors,",
            "were found in data: ", 
            paste(und.factors,collapse = ", "), 
            ". Use the ordered argument to specify",
            "categorical variables."
          ))
        }
      }
      
      # For now we don't do anything about ov.exogenous variables.
      ov.names.x <- NULL
      
      if (any(var.categorical) & missing != "listwise") { 
        stop(paste0(
          "miive: missing = ", missing, 
          " not supported in the presence of",
          " categorical variables."
        ))
      }
      
      
      fit <- lavaan::lavCor(
        data, 
        output = "fit", 
        missing = "listwise",
        estimator = "two.step",
        se = "standard",
        ov.names.x = ov.names.x,
        ordered = setdiff(ordered, ov.names.x)
      )
      
      sample.sscp <- NULL
      sample.nobs <- nrow(data)
 
      # Polychoric correlation matrix. 
      sample.polychoric <- unclass(lavaan::inspect(fit, "cor.ov"))
      
      # is polycor faster
      # corS<-matrix(NA,12,12)
      # for (i in 1:12){    
      #   for (j in i:12) {
      #     if(i==j){
      #       corS[i,j] <- 1
      #     } else {
      #       corS[i,j] <- polycor::polychor(data[,i],data[,j],ML=F)
      #       corS[j,i] <- corS[i,j]
      #     }
      #   }
      # }
      
      # Asymptotic covariance matrix of polychoric correlations. 
      asymptotic.cov  <- unclass(lavaan::inspect(fit, "vcov"))
        
      ordered.varnames <- apply(
        t(utils::combn(colnames(sample.polychoric), 2)), 1, function(x){
          paste0(x[1], "~~", x[2])
      })
        
        # Reorder asymptotic covariance matrix.
      asymptotic.cov  <- asymptotic.cov[
        ordered.varnames, 
        ordered.varnames
      ]
     
    } else { # there are no categorical variables
      
      sample.polychoric <- NULL
      
    }
    
    continuous.vars <- colnames(data)[!var.categorical]
    
    if (length(continuous.vars) > 1){
      
      # Are there any missing observations
      # any.miss <- any(var.nobs[continuous.vars] < sample.nobs)
      
      if (missing == "twostage"){ # begin missing data
        
        # var.cov <- outer(
        #   continuous.vars, continuous.vars,
        #   function(x, y) {
        #     paste(x, "~~", y)
        #   }
        # )
        # 
        # saturated.model <- c(
        #   var.cov[lower.tri(var.cov, diag = TRUE)],
        #   paste(continuous.vars, "~ 1")
        # )
        # 
        # saturated.fit <- lavaan::lavaan(
        #   model = saturated.model,
        #   data = data[,continuous.vars],
        #   meanstructure = TRUE,
        #   conditional.x = FALSE,
        #   fixed.x = FALSE,
        #   missing = "FIML",
        #   estimator = "ML",
        #   se = "robust.huber.white",
        #   information = "observed"
        # )
        # 
        # sample.cov  <- unclass(lavaan::inspect(saturated.fit, "cov.ov"))
        # sample.mean <- unclass(lavaan::lavInspect(saturated.fit, "mean.ov"))
        # sample.nobs <- lavaan::lavInspect(saturated.fit, "nobs")
        # sample.sscp <- buildSSCP(sample.cov, sample.mean, sample.nobs)
        # asymptotic.cov.sat <- unclass(lavaan::vcov(saturated.fit))
        
        saturated.fit <- rsem::rsem(data[,continuous.vars], 
                                    moment = TRUE, 
                                    varphi = 0.0, 
                                    st = 'i', 
                                    max.it = 1000)
        
        sample.mean <- saturated.fit$sem$mu
        sample.cov  <- saturated.fit$sem$sigma
        sample.nobs <- nrow(data)
        sample.sscp <- buildSSCP(sample.cov, sample.mean, sample.nobs)
        asymptotic.cov.sat <- saturated.fit$sem$gamma
        
        cov.str <- outer(continuous.vars, continuous.vars,function(x, y){paste0(x,"~~",y)})
        cov.nms <- t(cov.str)[lower.tri(cov.str, diag = TRUE)]
        mu.nms  <- paste0(continuous.vars,"~1")

        colnames(asymptotic.cov.sat) <- rownames(asymptotic.cov.sat) <- c(mu.nms,cov.nms)

        asymptotic.cov.sat <- asymptotic.cov.sat[c(cov.nms,mu.nms),c(cov.nms,mu.nms)]
        
      } else { # end missing data
        
        sample.cov  <- stats::cov(data[,continuous.vars])*
          (nrow(data[,continuous.vars])-1) / 
          nrow(data[,continuous.vars])
        
        sample.mean <- colMeans(data[,continuous.vars])
        sample.nobs <- nrow(data)
        sample.sscp <- buildSSCP(sample.cov, sample.mean, sample.nobs)
      
      }
    } 
  
  }
 
  # Prepare return list.
  g <- list(
    sample.cov  = sample.cov,
    sample.mean = sample.mean,
    sample.nobs = sample.nobs,
    sample.polychoric = sample.polychoric,
    sample.sscp = sample.sscp,
    asymptotic.cov = asymptotic.cov,
    asymptotic.cov.sat = asymptotic.cov.sat,
    var.nobs = var.nobs,
    var.categorical = var.categorical,
    var.missing = var.missing,
    var.exogenous = var.exogenous
  )

}
zackfisher/MIIVsem documentation built on June 26, 2017, 8:22 p.m.