R/tsfa.R

Defines functions estTSFmodel estTSF.ML predict.TSFmodel tfplot.TSFmodelEstEval print.summary.TSFmodelEstEval summary.TSFmodelEstEval summaryStats.TSFmodelEstEval summaryStats summary.TSFmodel FAmodelFitStats FAfitStats.TSFmodel FAfitStats.default FAfitStats tfplot.TSFexplained tfplot.TSFfactors tfplot.TSFmodel tframe.TSFmodel factorNames.TSFfactors factorNames.EstEval seriesNames.TSFmodel nfactors.TSFfactors nfactors.EstEval factors.EstEval factors.TSFmodel simulate.TSFmodel TSFmodel.FAmodel TSFmodel.default TSFmodel.TSFmodel TSFmodel DstandardizedLoadings.TSFmodel DstandardizedLoadings estFAmodel permusign predict.FAmodel summary.FAmodel LedermannBound explained coef.FAmodel factorNames.FAmodel factorNames nfactors.FAmodel nfactors factors.fFAmodel factors FAmodel.default FAmodel.FAmodel FAmodel

Documented in DstandardizedLoadings DstandardizedLoadings.TSFmodel estFAmodel estTSF.ML estTSFmodel explained factorNames factorNames.EstEval factorNames.FAmodel factorNames.TSFfactors factors factors.EstEval factors.TSFmodel FAfitStats FAfitStats.default FAfitStats.TSFmodel FAmodel FAmodel.default FAmodel.FAmodel FAmodelFitStats LedermannBound nfactors nfactors.EstEval nfactors.FAmodel nfactors.TSFfactors permusign predict.FAmodel predict.TSFmodel print.summary.TSFmodelEstEval seriesNames.TSFmodel simulate.TSFmodel summary.FAmodel summaryStats summaryStats.TSFmodelEstEval summary.TSFmodel summary.TSFmodelEstEval tfplot.TSFexplained tfplot.TSFfactors tfplot.TSFmodel tfplot.TSFmodelEstEval tframe.TSFmodel TSFmodel TSFmodel.default TSFmodel.FAmodel TSFmodel.TSFmodel

#  Notes
#   An  FAmodel has parameters but not factors. It can
#     optionally have stats (about estimation).
#   An  fFAmodel extends an  FAmodel by adding factors.
#   An  TSFmodel extends an  fFAmodel by time.
###################################################

#            Factor Analysis 

###################################################

FAmodel <- function(obj, ...)UseMethod("FAmodel")
FAmodel.FAmodel <- function(obj, ...) obj #extractor

FAmodel.default <- function(obj,  Omega=NULL, Phi=NULL, LB=NULL, LB.std=NULL,
   stats=NULL,  ...)
  {#  obj should be the loadings (hat)B for  x =  B f + e  
   if(!is.matrix(obj))
     stop("FAmodel.default requires a loadings matrix (factor loadings) as the first argument.")
   # do more checking (but only loadings is necessary (for simulation)
   #if(ncol(obj)!= nrow(Omega))
   #    stop("dimensions of obj (loadings) and  Omega do not agree.")
   classed(list(loadings=obj, Omega=Omega, Phi=Phi, LB=LB,  LB.std=LB.std, 
	   stats=stats), "FAmodel") # constructor
  }


factors <- function(x)UseMethod("factors")
# use predict with data to get factors
factors.fFAmodel <- function(x) x$f

nfactors <- function(x) UseMethod("nfactors")
nfactors.FAmodel <- function(x) {ncol(loadings(x))}

factorNames <- function(x) UseMethod("factorNames")
factorNames.FAmodel     <- function(x) dimnames(loadings(x))[[2]]

coef.FAmodel <- function(object, ...) {c(loadings(object), diag(object$Omega))}

explained <- function(object, ...)UseMethod("explained")
explained.FAmodel <- function (object, f=factors(object),
                  names=dimnames(loadings(object))[[1]], ...) {
  # portion of data explained by factors
  r <- t(loadings(object) %*% t(f))
  dimnames(r) <- list(NULL, names)
  r
  }

LedermannBound  <- function(M) {
   if (is.matrix(M)) M <- ncol(M)
   if (1 != length(M)) stop("M must be an integer number of indicator variables.")
      
   ## solve (M^2-M) - (2M+1)k + k^2 = 0 for k
   #r <- polyroot(c(M^2-M, -(2*M+1), 1))
   #if (any(abs(Im(r)) > 10*.Machine$double.eps))
   #         warning("LedermannBound has complex solution")
   #r <- Re(r)
   #r[(0 <=r) & (r <= M)]
   if(M <3) return(0)
   r <- M + 0.5 - (0.5 * sqrt(8*M + 1))
   #fuzz in next is just to assure rounding errror does not give an non-integer
   # when result should be integer. (Especially important if floor might result
   # in the next lower integer.)
   if (1e-10 > abs(r - round(r))) round(r) else r
   }

# FAfitStats could be moved here, but needs to be reorganized to use
#   cov rather than data (as in estFAmodel)

# could add summaryStats 

summary.FAmodel <- function(object, ...)
 {classed(list(k=nfactors(object), M=nrow(loadings(object)),
      Snames=dimnames(loadings(object))[[1]], Fnames=factorNames(object),
      Omega=!is.null(object$Omega), 
      Phi=!is.null(object$Phi), 
      LB=!is.null(object$LB), 
      estConverged=object$stats$estConverged, 
      rotationConverged=object$stats$rotationConverged, 
      orthogonal=object$stats$orthogonal
      ), "summary.TSFmodel")
 }

print.summary.FAmodel <- function (x, ...)
  {cat(x$k, " factors: ",     x$Fnames, "\n")
   cat(x$M, " indicators: ",  x$Snames, "\n")
   cat("Omega ", (if(x$Omega) "is" else "is not"), " specified.\n")
   cat("Phi   ", (if(x$Phi) "is" else "is not"), " specified.\n")
   cat("LB    ", (if(x$LB) "is" else "is not"), " specified.\n")
   if(!is.null(x$estConverged)) cat("loadings estimation ",
           (if(x$estConverged) "converged.\n" else "did not converge.\n"))
   if(!is.null(x$rotationConverged)) cat("rotation ",
           (if(x$rotationConverged) "converged.\n" else "did not converge.\n"))
   if(!is.null(x$orthogonal)) cat("rotation is ",
           (if(x$orthogonal) "orthogonal.\n" else "oblique.\n"))
  }


predict.FAmodel <- function(object, data = NULL, factorNames.=factorNames(object), ...){
      # prediction of factors with data
      if (is.null(data)) stop("data must be supplied.")
      r <- data %*% t(object$LB) #hatf
      dimnames(r) <- list(NULL, factorNames.)
      r
      }

permusign <- function(B, Btarget, Phi=NULL) {

# Selects the permutation and signs of the columns of the factor loadings B
# that resembles the Btarget matrix the most.
# Phi matrix (cov of factors) may need to be reordered for the permutation.
  
  ############################## local functions
  permute <- function(x){
     # with thanks to Bill Venables
     if (length(x) <= 1) as.matrix(x) else{
  	 M <- NULL
  	 for (i in seq(length(x))) M <- rbind(M, cbind(x[i], Recall(x[-i])))
  	 M
  	 }
     }
  signsw <- function(Bprop, Bnew, Btarget){
     # compare distance of Bprop from Btarget and also Bprop with column 
     # signs switched. If the best of these is better than Bnew to Btarget
     #  return the column signs (1 or -1), otherwise return NULL
     signs <- rep(1, ncol(Bprop))
     d1 <- colSums((Btarget - Bprop)^2)
     d2 <- colSums((Btarget + Bprop)^2) # all col signs reversed
     if ( sum(pmin(d1, d2)) < sum((Btarget - Bprop %*% diag(signs))^2))
     	signs <- 2 * (d1 < d2) - 1
     # the fuzz (1e-12) seems to be necessary to avoid rounding error causing
     # T when things should be equal,with the result that random 
     #  permutations occur.
     if (sum((Btarget - Bnew)^2) > 1e-12 +
         sum((Btarget - Bprop %*% diag(signs))^2) ) signs else NULL
     }

  ############################## end local functions

  P    <- permute(seq(ncol(B))) # permutation matrix
  Bnew   <- B
  PhiNew <- Phi
  if( ! is.null(Phi)) {
    for (j in seq(nrow(P))) {
      Bprop <- B[,P[j,]]
      signs <- signsw(Bprop, Bnew, Btarget)
      if(!is.null(signs)){
  	 #cat(j, ":", P[j,], signs)
  	 Bnew	<-  Bprop %*% diag(signs)
  	 PhiNew <-  (Phi[P[j,],P[j,]]) * outer(signs,signs)
  	 }
      }
    }
  list(loadings=Bnew,Phi=PhiNew)
}


estFAmodel <- function(Sigma, p, n.obs=NA,
                est="factanal", 
		estArgs=list(scores="none", control=list(opt=list(maxit=10000))),
		rotation=if(p==1) "none" else "quartimin", rotationArgs=NULL,
		GPFargs=list(Tmat=diag(p), normalize=TRUE, eps=1e-5, maxit=1000),
		BpermuteTarget=NULL,
                factorNames=paste("Factor", seq(p)),
                indicatorNames=NULL) {
  if (1e10 < max(diag(Sigma))/ min(diag(Sigma)) ) warning(
	 "Data variances are very different. Consider rescaling some indicators.")
 
  stds <- sqrt(diag(Sigma))
  if(p < 0)  stop("p (number of factors) must be greater than 0,")
  else if(p == 0) {
      # zero factors
      uniquenesses <- diag(1, nrow(Sigma))
      Omega  <- diag(Sigma)
      if(est != "factanal") 
        stop("Currently Omega  is only correct for factanal estimation.")
      loadings.std <- loadings <- Phi  <- LB <- LB.std <- NULL
      estConverged <- rotationConverged <- orthogonal <- TRUE
      }
  else {
      z <- do.call(est, c(list(covmat = Sigma, 
                 factors=p, n.obs=n.obs, rotation="none"), estArgs))

      estConverged <- z$converged
      
      uniquenesses <- z$uniquenesses
       
      # for debugging compare:  hatOmega - Omega, hatOmega, Omega

      # z$loadings is orth solution
      if (rotation == "none") {
         loadings.std <- z$loadings              
	 Phi  <- NULL
	 rotationConverged <- TRUE
	 orthogonal <- TRUE
 	 }
      else {	 
  	 rotB <- do.call(rotation, c(list(z$loadings), GPFargs, rotationArgs))
    	 loadings.std <-  rotB$loadings
	 rotationConverged <- rotB$convergence
	 orthogonal <- rotB$orthogonal
     
    	 # Make sure columns are ordered properly and have the correct signs.
    	 Phi <- rotB$Phi
    	 if (! is.null(BpermuteTarget)) {
    	    z  <- permusign(diag(stds) %*% loadings.std, BpermuteTarget, Phi=Phi)
    	    loadings.std <- diag(1/stds) %*% z$loadings
    	    Phi  <- z$Phi
    	    }
	 }
      loadings <- diag(stds) %*% loadings.std
      dimnames(loadings) <- list(indicatorNames, factorNames)

      ### Compute Bartlett-predicted factor scores 
      Sigma.std <- if(is.null(Phi)) loadings.std %*% t(loadings.std) + diag(uniquenesses) else
                        loadings.std %*% Phi %*% t(loadings.std) + diag(uniquenesses)
      SinvB <- solve(Sigma.std, loadings.std) 
      LB.std   <- solve(crossprod(loadings.std, SinvB), t(SinvB))

      LB <- LB.std %*% diag(1/stds)
      dimnames(LB) <- list(factorNames, indicatorNames)
      }
  FAmodel(loadings, Omega=diag(stds * uniquenesses * stds), 
      Phi=Phi, LB=LB, LB.std=LB.std,  
      stats=list(estConverged=estConverged, rotationConverged=rotationConverged,
	orthogonal=orthogonal, uniquenesses=uniquenesses, call=match.call()))
  }

###################################################

#        Time Series Factor Analysis 

###################################################


DstandardizedLoadings <- function(x)UseMethod("DstandardizedLoadings")
DstandardizedLoadings.TSFmodel <- function(x){
    r <- diag(1/sqrt(diag(cov(diff(x$data))))) %*% loadings(x)
    dimnames(r) <- dimnames(loadings(x))
    r
    }


# standardizedLoadings <- function(x)UseMethod("standardizedLoadings")
# standardizedLoadings.TSFestModel <- function(x){
#   To transform everything back to
#   the undifferenced scales and then standardize, then somehow a
#   pseudo-Phi and pseudo-Omega (called \Gamma_t and \Psi_t in the TSFA
#   paper) must be computed first.    
#     r <- diag(1/sqrt(diag(cov(x$data)))) %*% loadings(x)
#     dimnames(r) <- dimnames(loadings(x))
#     r
#     }

TSFmodel <- function(obj, ...)UseMethod("TSFmodel")
TSFmodel.TSFmodel <- function(obj, ...) obj #extractor

TSFmodel.default <- function(obj, f=NULL, Omega=NULL, Phi=NULL, LB=NULL,
        positive.data=FALSE, names=NULL, ...)
  {#  arg break.points=NULL, not yet supported
   #  obj should be the loadings (hat)B
   #     x =  B f + e  # NB no mean added to give x
   #   vector processes x, f, and e have times series matrices of data so
   #   calculation is eg t(B %*% t(f))

   if(is.null(f)) stop(" f must be specified.")

   if(!is.matrix(obj))
     stop("TSFmodel.default requires a loadings matrix (factor loadings) as the first argument.")

   if(ncol(obj)!=nseries(f))  stop("dimensions of obj (B) and  f do not agree.")
   if(is.null(names)) names <- paste("Series", seq(nrow(obj)))
   classed(list(loadings=obj, f=f, Omega=Omega, Phi=Phi, LB=LB, 
	   positive.data=positive.data,
           names=names,  #break.points=break.points, 
	   dots=list(...)), c("TSFmodel", "fFAmodel", "FAmodel")) # constructor
  }

TSFmodel.FAmodel <- function(obj, f=NULL, positive.data=FALSE, names=NULL, ...)
  {if(is.null(f)) stop(" f must be specified.")

   if(ncol(loadings(obj))!=nseries(f))  
       stop("dimensions of obj loadings and  f do not agree.")
   if(is.null(names)) names <- paste("Series", seq(nrow(obj$loadings)))
   classed(append(obj, list(f=f, positive.data=positive.data, names=names,...)),
          c("TSFmodel", "fFAmodel", "FAmodel")) # constructor
  }

simulate.TSFmodel <- function(model, f=factors(model), Cov=model$Omega, sd=NULL, noise=NULL,  
	rng=NULL, noise.model=NULL, ...)
   {#  (... further arguments, currently disregarded)
    # tframe and Tobs are taken from factors (f) 
    
    if ( is.null(Cov) & is.null(sd) & is.null(noise) & is.null(noise.model))
      stop("One of Cov, sd, noise, or noise.model, must be specified.")

    p <- nrow(loadings(model))
    noise <- dse::makeTSnoise(Tobs(f), p, 1, noise=noise, rng=rng,
                        Cov=Cov, sd=sd, noise.model=noise.model)

    #use the calculation in explained but discard the class setting
    x <- unclass(explained(model, f=f)) + noise$w  
    if (model$positive.data && any( x < 0 )) {
        warning("negative simulated data values set to zero.")
        x[ x < 0 ] <- 0
        }
    attr(x, "noise") <- noise
    attr(x, "TSFmodel") <- model
    tframed(x, tf=tframe(f), names=seriesNames(model))
}


factors.TSFmodel <- function(x) classed(x$f, c("TSFfactors", class(x$f)))
#factors.TSFestModel <- function(x) {
#   r <- factors(TSFmodel(x))
#   trueM <- attr(x$data, "TSFmodel")
#   if(!is.null(trueM)) attr(r, "true") <- factors(trueM)
#   r
#   }


factors.EstEval  <- function(x)
   {N <- length(x$result)
    r <- vector(mode="list", length=N)
    for (i in 1:N) r[[i]] <- factors(x$result[[i]])
    classed(list(result=r, truth=factors(x$truth)),
            c("factorsEstEval", "EstEval"))
  }

#diff.TSFmodel  <- function (x, ...){
#  x$f <- diff(x$f)
#  x 
#  }

# was diff.TSFestModel
diff.TSFmodel  <- function (x, ...){
  x$f <- diff(x$f)
  x$data <- diff(x$data)
  trueM <- attr(x$data, "TSFmodel")
  if(!is.null(trueM)){
     trueM$f <- diff(factors(trueM))
     attr(x$data, "TSFmodel") <- trueM
     }
  x 
  }

diff.TSFexplained  <- function (x, ...){
  tf <- diff(tframe(x))
  r  <- tframed(diff(unclass(x)), tf=tf)
  d  <- attr(x, "data")
  if(!is.null(d)) attr(r, "data") <- tframed(diff(d), tf=tf)
  classed(r, c("TSFexplained", class(r))) 
  }

diff.TSFfactors  <- function (x, ...){
  tf <- diff(tframe(x))
  r <- tframed(diff(unclass(x)), tf=tf)
  truef <- attr(x, "true")
  if(!is.null(truef)) attr(r, "true") <- tframed(diff(unclass(truef)), tf=tf)
  classed(r, c("TSFfactors", class(r))) 
  }

diff.factorsEstEval  <- function (x, ...){
  N <- length(x$result)
    r <- vector(mode="list", length=N)
    for (i in 1:N) r[[i]] <- diff(x$result[[i]])
    classed(list(result=r, truth=diff(x$truth)),
            c("factorsEstEval", "EstEval"))
  }

nfactors.EstEval <- function(x) {nfactors(x$truth)}
nfactors.TSFfactors <- function(x) {nseries(x)}

seriesNames.TSFmodel <- function(x) {x$names}

factorNames.EstEval     <- function(x) factorNames(x$truth)
factorNames.TSFfactors     <- function(x) seriesNames(x)


tframe.TSFmodel <- function(x) tframe(x$f)

tfplot.TSFmodel <- function(x,..., tf=tfspan(x , ...),   
      start=tfstart(tf), end=tfend(tf), 
      series=seq(nfactors(x)),
      Title="Model factors", 
      lty = 1:5, lwd = 1, pch = NULL, col = 1:6, cex = NULL,
      xlab=NULL, ylab=factorNames(x), xlim = NULL, ylim = NULL, 
      graphs.per.page=5, par=NULL, reset.screen=TRUE) {

   tfplot(factors(x),...,  tf=tf, start=start, end=end, 
	series=series, Title=Title, 
        lty=lty, lwd=lwd, pch=pch, col=col, cex=cex,
        xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim,
	graphs.per.page=graphs.per.page, 
	par=par, reset.screen=reset.screen)
   }

#tfplot.TSFestModel <- function(x,...)  tfplot(factors(x), ...)

tfplot.TSFfactors <- function(x,..., tf=tfspan(x , ...),  
      start=tfstart(tf), end=tfend(tf), 
      series=seq(nfactors(x)),
      Title="Estimated factors (dashed) and true (solid)", 
      lty = c("dashed", "solid"), lwd = 1, pch = NULL, col = 1:6, cex = NULL,
      xlab=NULL, ylab=factorNames(x), xlim = NULL, ylim = NULL, 
      graphs.per.page=5, par=NULL, reset.screen=TRUE) {

   tfplot(unclass(x), attr(x, "true"),  ..., 
        tf=tf, start=start, end=end, 
	series=series, Title=Title, 
        lty=lty, lwd=lwd, pch=pch, col=col, cex=cex,
        xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim,
	graphs.per.page=graphs.per.page, 
	par=par, reset.screen=reset.screen)
   }


tfplot.TSFexplained <- function(x,..., tf=tfspan(x, ...), start=tfstart(tf), end=tfend(tf), 
      series=seq(nseries(x)),
      Title="Explained (dashed) and actual data (solid)", 
      lty = c("dashed", "solid"), lwd = 1, pch = NULL, col = 1:6, cex = NULL,
      xlab=NULL, 
      ylab=seriesNames(x), 
      xlim = NULL, ylim = NULL,
      graphs.per.page=5, par=NULL, reset.screen=TRUE) {
    tfplot( unclass(x), attr(x, "data"), 
                Title=Title,
                tf=tf, start=start, end=end, series=series,  
                lty=lty, lwd=lwd, pch=pch, col=col, cex=cex,
                xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim,
		graphs.per.page=graphs.per.page, 
		par=par, reset.screen=reset.screen)
  }


FAfitStats <- function(object, ...)UseMethod("FAfitStats")

FAfitStats.default <- function(object, diff.=TRUE, 
                N=(nrow(object) - diff.), 
		control=list(lower = 0.0001, opt=list(maxit=1000)), ...) {
    if (!is.matrix(object)) stop("FAfitStats.default expects a matrix object.")
    corDX  <- cor(if (diff.) diff(object) else object)
    #covDX  <- cov(if (diff.) diff(object) else object)
    nvar   <- ncol(object)
    maxfact <- floor(LedermannBound(nvar))
    OmegaTot <- matrix(NA,nvar, maxfact+1)
    
    # Fit statistics could be calculate with either corDX or covDX. Factanal
    # transforms a covariance matrix to a correlation matrix: the loadings and 
    # uniquenesses it returns are for the standardized solution (correlation 
    #  matrix) in whether it is given a cov or cor matrix. This means the
    # loadings an uniquenesses must be converted bac to the unstandardized
    # scale, if the fit staistics are to be calculated with the cov.
    # Commented code below does the calculation using covariances.

    # zero factors
    fitStats   <- FAmodelFitStats(NULL, NULL, diag(corDX), corDX, N)
    OmegaTot[,1] <- diag(corDX)
    nm <- list(names(fitStats), c(0, seq(maxfact), "saturated"))
    
    for (j in 1:maxfact) { # loop over number of factors
     	# estimate parameters
    	FA <- factanal(factors = j, covmat = corDX, n.obs = N,
    			scores = "none", rotation = "none", control=control)
    	#FA <- factanal(factors = j, covmat = covDX, n.obs = N,
    	#		scores = "none", rotation = "none", control=control)
     	OmegaTot[,j+1] <- FA$uniquenesses
        
        # compute fit statistics

 	fitStats <- cbind(fitStats, 
            FAmodelFitStats(FA$loadings, diag(1,j,j), FA$uniquenesses, corDX, N))
	#D <- diag(sqrt(diag(covDX)))
 	#fitStats <- cbind(fitStats, 
	#   FAmodelFitStats(D %*% FA$loadings, diag(1,j,j),
	#                               diag(covDX) * FA$uniquenesses, covDX, N))
        }

    Hey <- apply(control$lower == OmegaTot, 2, any)
    if(any(Hey)) warning("Heywood cases: ", 
                   paste((0:maxfact)[Hey], collapse=" "), " factor model(s)")

    # saturated model
    M <- nvar
    #k <- maxfact
    #nparc <- M * k + M - (k *(k-1))/2 # no. of param's corrected for rotation
    # above is for largest actual model, but saturated model may correspond
    # to a non-integer Ledermann bound
    nparc <- M *(M+1)/2 
    fitStats <- cbind(fitStats, c(
        0,0,NA,0,         # chisq=0, df=0, pval=na, delta=0
        NA,1,1,1,         # RMSEA=na, RNI=1, CFI=1, MCI=1
        1,1,0,            # GFI=1, AGFI=1, AIC=0
        (1 + log(N)) * nparc,  # CAIC
        log(N) * nparc,        # SIC
        (2 * nparc)/N,         # CAK
        2 * nparc/(N-M-1) ))    # CK


    dimnames(fitStats) <- nm
    
    # differences between consecutive models

    seqfitStats <- NULL
    for (j in 1:(maxfact+1))  seqfitStats <- cbind(seqfitStats, 
        c( fitStats["chisq",j] - fitStats["chisq",j+1],
	   fitStats["df",   j] - fitStats["df",   j+1],
	   pchisq(fitStats["chisq",j] - fitStats["chisq",j+1], 
	          fitStats["df",   j] - fitStats["df",   j+1], lower.tail=FALSE)))
    
    nm <- dimnames(fitStats)[[2]]
    dimnames(seqfitStats) <- list(c("chisq", "df", "pval"), 
                                  paste(nm[-length(nm)], "vs", nm[-1]))
        
    list(fitStats=fitStats, seqfitStats=seqfitStats)  #, OmegaTot= OmegaTot)
    }

FAfitStats.TSFmodel <- function(object, diff.=TRUE,
                             N=(nrow(object$data) - diff.), ...) {
    # This uses unstandardized B and Omega (and covDX rather than corDX).
    # This should be the same as standardized for MLE, but not for other
    #  estimation methods. Standardized may be better conditioned numerically,
    #  so might be perferred for ML, but probably does not seem to make much
    #  differene in simple tests. Might consider using both.
    
    X <- if(diff.) diff(object$data) else object$data
    FAmodelFitStats(loadings(object), object$Phi, diag(object$Omega),
                   cov(X), N)
    } 


FAmodelFitStats <- function(B, Phi, omega, S, N) {
  tr <- function(A) {sum(diag(A))} # local function, trace of a matrix

  # Fit statistics of FA model, based on standard likelihood.
  # No consistency checks are made.
  #
  # See, e.g., Wansbeek, T., & Meijer, E. (2000). Measurement error and latent
  #   variables in econometrics. Amsterdam: North-Holland. (W&M below)
  #
  # B     = loadings
  # Phi   = cov. matrix of factors
  # omega = vector of error variances
  # S     = sample covariance matrix, or correlation matrix if B and omega are
  #         standardized. (see the notes in FAfitStats.default)
  # N     = sample size (Many authors prefer sample size - 1.)
  # k     = number of factors (may be zero)

  # numbers of variables and factors

  M <- nrow(S)
  k <- if (is.null(B)) 0 else ncol(B)

  # Saturated model: all elements of cov. matrix are free parameters.

  const <- log(det(S)) + M

  # Null model: independence model (W&M, p. 305).

  Sigma0 <- diag(c(diag(S)))
  chisq0 <- N * (log(det(Sigma0)) + tr(solve(Sigma0) %*% S) - const)
  df0    <- 0.5 * (M^2 - M)
  delta0 <- max(0, chisq0 - df0)

  # Target model, i.e., the one from which B, Phi, and omega are obtained.

  # Model-implied covariance matrix
  if (k == 0)  Sigma <- diag(c(omega))
  else if (is.null(Phi)) Sigma <- B %*% t(B) + diag(c(omega))
  else                   Sigma <- B %*% Phi %*% t(B) + diag(c(omega))


  # Chi-square statistic, its degrees of freedom, and its p-value (W&M, p. 298).
  # Note: the df takes the rotational freedom into account (cf. W&M, p. 169).
  chisq <- N * (log(det(Sigma)) + tr(solve(Sigma) %*% S) - const)
  df    <- 0.5 * ( (M - k)^2  -  (M + k) )
  pval  <- pchisq(chisq, df, lower.tail=FALSE)

  # Estimate of noncentrality parameter (W&M, p. 307).
  delta <- max(0, chisq - df)

  # Comparative fit index (W&M, p. 307).
  if(chisq0 <= df0)       CFI <- 0 # Null model fits very well: extremely unlikely.
  else if(chisq <= df)    CFI <- 1 # Target model fits very well.
  else if(delta0 < delta) CFI <- 0 # Null model fits better than target model: also extremely unlikely.
  else        CFI <- 1 - delta/delta0  # The most common situation: null model fits very badly,
                             # target model fits better, but not perfectly.

  # Root mean square error of approximation (W&M, p. 309).
  RMSEA <- if (df > 0)  sqrt(delta/(N * df)) else  Inf


  # Dozens of other fit indexes possible. See output of LISREK, EQS,
  # Amos, Mplus, and Mx, and the paper by Ogasawara (SEM, ca. 2001).
  # Most are very bad. Here are some possibilities, from W&M (chap. 10),
  # Hu & Bentler (1995), and Bollen (1989, pp. 256-281):

   # NFI  <- 1 - chisq/chisq0 # Normed fit index
   # TLI  <- (chisq0/df0 - chisq/df)/(chisq0/df0 - 1) # Tucker-Lewis Index,
  	       # also called Nonnormed fit index
   # BL86 <- 1 - (chisq/df)/(chisq0/df0) # Bollen (1986)
   # BL89 <- (chisq0 - chisq)/(chisq0 - df) # Bollen (1989)
   RNI  <- 1 - delta/delta0 # Relative noncentrality index
   MCI  <- exp(-0.5 * (chisq - df)/N) # McDonald's centrality index
  
  # # Some indexes by Joreskog & Sorbom (1981, 1986):
  
   T1	<- solve(Sigma) %*% S  # Temporary matrix
   T2	<- T1 - diag(1,M,M)    # Temporary matrix
   GFI  <- 1 - tr(T2 %*% T2)/tr(T1 %*% T1) # Goodness of fit index
   AGFI <- 1 - ((M * (M+1))/(2 * df)) * (1 - GFI) # Adjusted GFI
   #RMR <- sqrt(((sum( (   c(S - Sigma))^2 ) +
   #		 sum( (diag(S - Sigma))^2 ))/(M *(M+1))) #Root mean-square residual

  # # Hoelter's (1983) Critical N; note that the significance level
  # # alpha must be given. E.g.:
  # # alpha <- 0.05
  # # Apparently, this is the definition of Hoelter:
  # CN <- (qnorm(alpha/2, lower.tail=FALSE) + sqrt(2*df-1))^2/(2*chisq/N) + 1
  # # but I've also seen the following, which makes more sense:
  # # CN <- qchisq(alpha, df, lower.tail=FALSE)/(chisq/N) + 1
  # # although both are not very useful.

  # # Some information criteria; accounting for rotational freedom
   nparc <- M * k + M - (k *(k-1))/2 # no. of param's corrected for rotation
   AIC   <- chisq - 2 * df # or chisq + 2 * nparc # Akaike's info. crit.
   CAIC  <- chisq + (1 + log(N)) * nparc # Consistent AIC
   SIC   <- chisq + log(N) * nparc # Schwarz's Bayesian info. crit.
   CAK   <- (chisq + 2 * nparc)/N  # Cudeck & Browne's rescaled AIC
   CK	 <- chisq/N + 2 * nparc/(N-M-1) # Cudeck & Browne's cross-val. index

  r <- c(chisq, df, pval, delta, RMSEA, RNI, CFI,
         MCI, GFI, AGFI, AIC, CAIC, SIC, CAK, CK) #NFI, TLI, BL86, BL89

  names(r) <- c("chisq", "df", "pval", "delta", "RMSEA", "RNI", "CFI",
         "MCI", "GFI", "AGFI", "AIC", "CAIC", "SIC", "CAK", "CK")
  r
}

summary.TSFmodel <- function(object, ...)
 {fitStats <- FAfitStats(object)
  est  <- TSFmodel(object)
   
  barx     <- colMeans(object$data)
  barx.est <- colMeans(explained(est))
  hatk     <- colMeans(factors(est))

  barDx     <-  colMeans(diff(object$data ))
  barDx.est <-  colMeans(diff(explained(est)))
  hatDk     <-  colMeans(diff(factors(est)))

  true <- attr(object$data, "TSFmodel")
  if (is.null(true)) 
      B.true <- hatk.true <- hatDk.true <- NULL 
  else  {
      B.true     <- true$loadings
      hatk.true  <- colMeans(true$f)
      hatDk.true <- colMeans(diff(factors(true)))
      }
  
  classed(list(
      N=Tobs(factors(object)), S=start(factors(object)), E=end(factors(object)),
      Snames=seriesNames(object),Fnames=factorNames(est),
      fitStats=fitStats, B.estimate=est$loadings,   B.true=B.true,
      #stdB.estimate=standardizedLoadings(object), 
      DstdB.estimate=DstandardizedLoadings(object),
      barDx=barDx, barDx.est=barDx.est,   barx=barx,  barx.est=barx.est,
      hatDk=hatDk, hatDk.true=hatDk.true, hatk=hatk,  hatk.true=hatk.true),
	  "summary.TSFmodel")
  }


   #positive.data=object$positive.data
   #cat("positive.data ", (if(x$positive.data) "is" else "is not"), " specified.\n")

print.summary.TSFmodel <- function (x, ...)
  {cat("factors have ", x$N, " observations from:", x$S, " to ", x$E, "\n")
   cat("     Estimated loadings:\n"); print(x$loadings.estimate)
   cat("\n     Standardized (using differenced data covariance):\n")
   print(x$DstdB.estimate)
   #cat("\n     Standardized (using undifferenced data covariance):\n")
   #print(x$stdB.estimate) 
   if (!is.null(x$loadings.true))
     {cat("\n   true loadings:\n"); print(x$loadings.true)
      cat("\n   loadings estimation error:\n"); print(x$loadings.estimate - x$loadings.true)
     }


   z <- rbind(x$barx.est,x$barx,  x$barx.est - x$barx)
   dimnames(z) <- list(c("explained","actual","error"), x$Snames)
   cat("\n                 Mean of data:\n"); print(z)

   z <- rbind(x$barDx.est,x$barDx,  x$barDx.est - x$barDx)
   dimnames(z) <- list(c("explained","actual","error"), x$Snames)
   cat("\n		  Mean of differenced data:\n"); print(z) 

   if (!is.null(x$hatk.true))
     {z <- rbind(x$hatk, x$hatk.true,  x$hatk - x$hatk.true)
      dimnames(z) <- list(c("estimated","true","error"), x$Fnames)
     }
   else
     {z <- x$hatk
      names(z) <- x$Fnames
     }
   cat("\n     Mean of factors:\n"); print(z)
   
   if (!is.null(x$hatDk.true))
     {z <- rbind(x$hatDk, x$hatDk.true,  x$hatDk - x$hatDk.true)
      dimnames(z) <- list(c("estimated","true","error"), x$Fnames)
     }
   else 
     {z <- x$hatDk
      names(z) <- x$Fnames
     }
   cat("\n    Mean of differenced factors:\n"); print(z)

   cat("\n   Fit statistics:\n")
   print(x$fitStats)
   invisible(x)
  }



distribution.factorsEstEval <- function (obj, ..., bandwidth = "nrd0",
        cumulate=TRUE, graphs.per.page = 5, Title=NULL)
  {# if cumulate is true then a distribution is plotted, otherwise,
   # a time series graph of the true and one 1 sd bands
    truth <- obj$truth
    r <- array(NA, c(length(obj$result), dim(truth)))
    otherobj <- list(...)
    obr <- list()
    for (ob in otherobj)
      {if (! testEqual(truth, ob$truth))
                    warning("object true values do not correspond.")
       rx <- r
       for (i in 1:length(ob$result)) rx[i,,] <- ob$result[[i]] - truth
       obr <- append(obr, list(rx))
      }
    for (i in 1:length(obj$result)) r[i,,] <- obj$result[[i]] - truth
    xlab <- "factor "
    old.par <- par(par)
    on.exit(par(old.par)) 
    if (cumulate){
       par(mfcol = c(min(graphs.per.page, ncol(truth)), 1), no.readonly = TRUE)
       for (i in 1:ncol(truth))
         {rd <- density(c(r[,,i]), bw = bandwidth)
          rdy <- rd$y
	  for (rx in obr)
	         rdy <- cbind(rdy, density(c(rx[,,i]), bw = bandwidth)$y)
	  matplot(rd$x, rdy, type = "l",
	     ylab = "density", xlab = paste(xlab, i), main = "")
          if(!is.null(Title) && (i==1) && (is.null(options()$PlotTitles)
                || options()$PlotTitles)) title(main = Title)
	 }
       }
    else
      {rd <- apply(r,c(2,3), FUN="var")^0.5
       tfplot(truth, truth + rd, truth - rd,
                   Title=Title, graphs.per.page = graphs.per.page)
      }
    invisible()
}

checkResiduals.TSFmodel <- function (obj, data=obj$data, diff.=TRUE, ...) {
	res <- if (diff.) diff(explained(obj)) - diff(data)
	           else explained(obj) - data
	seriesNames(res) <- seriesNames(data)
	cat("residual covariance matrix\n")
	cv <- cov(res)
	print(cv)
	cat("\nsum of trace cov: ", sum(diag(cv)), "\n")
	cat("sum of abs (off-diag of cov): ", sum(abs(cv - diag(cv))), "\n")
	checkResiduals(res, ...)
}


summaryStats <- function(object, ...) UseMethod("summaryStats")

summaryStats.TSFmodelEstEval <- function(object, ...) {

  N <- length(object$result)
  if (N <2 ) stop("This requires more than one replication.")

  meanhatf <- sdhatf <- meanhatDf <- sdhatDf <- meanhatPCf <- 
              sdhatPCf <- meanhatB <- sdhatB <- 
	      estConverged <- rotationConverged <- 0

  for (m in object$result) { 
      meanhatf  <- meanhatf   + factors(m)
      sdhatf	<- sdhatf     + factors(m)^2
      meanhatDf <- meanhatDf  + diff(factors(m))
      sdhatDf   <- sdhatDf    + diff(factors(m))^2
      meanhatPCf<- meanhatPCf + percentChange(factors(m))
      sdhatPCf  <- sdhatPCf   + percentChange(factors(m))^2
      meanhatB  <- meanhatB   + TSFmodel(m)$loadings
      sdhatB	<- sdhatB     + TSFmodel(m)$loadings^2
      if (!is.null(TSFmodel(m)$dots$estConverged) && !TSFmodel(m)$dots$estConverged)
          estConverged <- estConverged + 1
      if (!is.null(TSFmodel(m)$dots$rotationConverged) && !TSFmodel(m)$dots$rotationConverged)
          rotationConverged <- rotationConverged + 1
      }
 
  true <- factors(object$truth)
  tf <- tframe(true)
  dtf <- tframe(diff(true))
  
  meanhatf   <- meanhatf   /N
  meanhatDf  <- meanhatDf  /N
  meanhatPCf <- meanhatPCf /N
  meanhatB   <- meanhatB   /N
 
  sdhatf   <- sqrt(sdhatf   /N - meanhatf^2)
  sdhatDf  <- sqrt(sdhatDf  /N - meanhatDf^2)
  sdhatPCf <- sqrt(sdhatPCf /N - meanhatPCf^2)
  sdhatB   <- sqrt(sdhatB   /N - meanhatB^2)
  
  list(true=true, Btrue=TSFmodel(object$truth)$loadings,
  	meanhatf   =  tframed(meanhatf,   tf), 
  	meanhatDf  =  tframed(meanhatDf, dtf), 
  	meanhatPCf =  tframed(meanhatPCf,dtf), 
  	meanhatB   =          meanhatB,   
  	sdhatf     =  tframed(sdhatf,     tf),	
  	sdhatDf    =  tframed(sdhatDf,   dtf),  
  	sdhatPCf   =  tframed(sdhatPCf,  dtf), 
  	sdhatB     =          sdhatB,
        estConverged = estConverged,
        rotationConverged = rotationConverged)
  }


summary.TSFmodelEstEval <- function(object, ...) {
  sm <- summaryStats(object, ...)
  classed(list(
      meanhatf.error  = colMeans(sm$meanhatf  - sm$true),
      meanSDhatf      = colMeans(sm$sdhatf),
      meanhatDf.error = colMeans(sm$meanhatDf - diff(sm$true)),
      meanSDhatDf     = colMeans(sm$sdhatDf),
      meanhatPCf.error= colMeans(sm$meanhatPCf -
                               percentChange(sm$true)),
      meanSDhatPCf    = colMeans(sm$sdhatPCf),
      meanhatB.error  = sm$meanhatB - sm$Btrue, 
      SDhatB          = sm$sdhatB,
      estConverged = sm$estConverged,
      rotationConverged = sm$rotationConverged), "summary.TSFmodelEstEval")
  }

print.summary.TSFmodelEstEval <- function(x, digits = options()$digits, ...) {
  cat("    mean hat{f} error\n") ; print(x$meanhatf.error, digits=digits)
  cat("\n    mean  SD hat{f}\n")   ; print(x$meanSDhatf, digits=digits)
  cat("\n    mean diff hat{f} error\n");  print(x$meanhatDf.error,  digits=digits)
  cat("\n    mean %change hat{f} error\n");  print(x$meanhatPCf.error,  digits=digits)
  cat("\n    mean hat{B} error") ; print(x$meanhatB.error, digits=digits) 
  cat("\n      SD hat{B}")       ; print(x$SDhatB, digits=digits) 
  cat("\n    Estimates NOT converged: ", x$estConverged) 
  cat("\n    Rotations NOT converged: ", x$rotationConverged) 
  cat("\n") 
  invisible(x)
  }


tfplot.TSFmodelEstEval <- function(x, ...,  tf=NULL, start=tfstart(tf), end=tfend(tf), 
		 series=seq(nseries(factors(x))),
		 Title="Monte Carlo Results", 
		 lty = c("solid", "dotdash", "dashed",  "dashed"), lwd = 1, pch = NULL, 
		 col = c("black", "red", "red", "red"), cex = NULL,
		 xlab=NULL, 
		 ylab=seriesNames(factors(x$truth)), 
		 xlim = NULL, ylim = NULL,
		 graphs.per.page=5, par=NULL, reset.screen=TRUE,
		 diff.=FALSE,  percentChange.=FALSE,
                 PCcentered.=FALSE, summary.=TRUE) {

   #if summary. is FALSE then all of the factors are plotted,
   # otherwise the mean and 1 SD bounds are plotted as follows:
   #if diff. is TRUE then the differenced factors are plotted
   #if percentChange. is TRUE then the PC factors are plotted
   #if PCcentered. is TRUE then the PC factors less means are plotted
   # otherwise the undifferenced factors are plotted

   true <- factors(x$truth)
   
   if(!summary.) {
     if(is.null(tf)) tf <- tframe(true)
     tfplot(factors(x), tf = tf, start=start, end=end, series=series,
        truth = true,
        Title = Title,
      ylab = seriesNames(true), remove.mean = FALSE, graphs.per.page = 5,
       par = par, reset.screen = TRUE, ...)
      } 
   else {
      sm <- summaryStats(x)

      if(diff.) { # factor difference
          df <- diff(true)
	  if(is.null(tf)) tf <- tframe(df)
    	  tfplot(df,
    		 sm$meanhatDf, 
    		 sm$meanhatDf  + 1.96 * sm$sdhatDf,  
    		 sm$meanhatDf  - 1.96 * sm$sdhatDf,
		Title=Title,
                tf=tf, start=start, end=end, series=series,  
                lty=lty, lwd=lwd, pch=pch, col=col, cex=cex,
                xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim,
		graphs.per.page=graphs.per.page, 
		par=par, reset.screen=reset.screen)
    	  }
      else if(percentChange.){ # factor growth rates
          pc <- percentChange(true)
	  if(is.null(tf)) tf <- tframe(pc)
    	  tfplot(pc,
    		 sm$meanhatPCf, 
    		 sm$meanhatPCf  + 1.96 * sm$sdhatPCf,  
    		 sm$meanhatPCf  - 1.96 * sm$sdhatPCf,
		Title=Title,
                tf=tf, start=start, end=end, series=series,  
                lty=lty, lwd=lwd, pch=pch, col=col, cex=cex,
                xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim,
		graphs.per.page=graphs.per.page, 
		par=par, reset.screen=reset.screen)
    	  }
      else if(PCcentered.){ # factor growth rates: bias (clearer picture?)
    	  growth <- percentChange(true)
          if(is.null(tf)) tf <- tframe(growth)
	  tfplot(sm$meanhatPCf - growth, 
    		sm$meanhatPCf  + 1.96 * sm$sdhatPCf - growth, 
    		sm$meanhatPCf  - 1.96 * sm$sdhatPCf - growth,
		Title=Title,
                tf=tf, start=start, end=end, series=series,  
                lty=lty, lwd=lwd, pch=pch, col=col, cex=cex,
                xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim,
		graphs.per.page=graphs.per.page, 
		par=par, reset.screen=reset.screen)
          }
      else { # factors
          if(is.null(tf)) tf <- tframe(true)
    	  tfplot(true,
    		 sm$meanhatf, 
    		 sm$meanhatf  + 1.96 * sm$sdhatf, 
    		 sm$meanhatf  - 1.96 * sm$sdhatf,
		Title=Title,
                tf=tf, start=start, end=end, series=series,  
                lty=lty, lwd=lwd, pch=pch, col=col, cex=cex,
                xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim,
		graphs.per.page=graphs.per.page, 
		par=par, reset.screen=reset.screen)
    	  }
      }
   invisible(x)
   }


predict.TSFmodel <- function(object, data = object$data, factorNames.=factorNames(object), ...){
      # prediction of factors with data
      if (is.null(data)) stop("data must be supplied.")
      tframed(data %*% t(object$LB), tframe(data), names=factorNames.) #hatf
      }


#explained.TSFestModel <- function (object, ...)
# {r <- explained(TSFmodel(object), names=seriesNames(object$data), ...) 
#  attr(r, "data") <- object$data
#  r
# }

#explained.TSFmodel <- function (object, f=factors(object),
#                  names=seriesNames(object), ...) {
#  # portion of data explained by factors
#  classed(tframed(t(loadings(object) %*% t(f)), tf=tframe(f), names=names),
#     "TSFexplained") 
#  }

explained.TSFmodel <- function (object, f=factors(object),
                  names=seriesNames(object), ...) {
  # portion of data explained by factors
  tframed(t(loadings(object) %*% t(f)), tf=tframe(f), names=names)
  }


#######################################

estTSF.ML <- function(y, p, diff.=TRUE, 
                      rotation=if(p==1) "none" else "quartimin", 
		      rotationArgs=NULL, 
		      normalize=TRUE, eps=1e-5, maxit=1000, Tmat=diag(p),
		      BpermuteTarget=NULL,
                      factorNames=paste("Factor", seq(p))) {
   # it would be better to have GPFargs=list(Tmat=Tmat, normalize=normalize, eps=eps, maxit=maxit)

      # Estimate parameters using standard (quasi) ML factor analysis
      # (on the correlation matrix and then scaled back).
      # factanal always uses the cor matrix, so standardizing does not affect 
      # the solution. Both standardized and not can be calculated after.
      # With non ML methods this solutions may differ (and working with cov  
      # rather than cor is probabably better.
   
   estTSFmodel(y, p, diff.=diff., 
        est="factanal", 
	estArgs=list(scores="none", control=list(opt=list(maxit=10000))),
        rotation=rotation, 
	rotationArgs=rotationArgs, 
	GPFargs=list(Tmat=diag(p), normalize=normalize, eps=eps, maxit=maxit),
	BpermuteTarget=BpermuteTarget,
        factorNames=factorNames)
   }

estTSFmodel <- function(y, p, diff.=TRUE, 
                est="factanal", 
		estArgs=list(scores="none", control=list(opt=list(maxit=10000))),
                rotation=if(p==1) "none" else "quartimin", 
		rotationArgs=NULL, 
		GPFargs=list(Tmat=diag(p),normalize=TRUE, eps=1e-5, maxit=1000), 
		BpermuteTarget=NULL,
                factorNames=paste("Factor", seq(p))) {
      if (p < 1) stop("number of factors must be a positive integer.")
      indicatorNames <- seriesNames(y)
      zz <- if (diff.) diff(y) else y
      zz <- sweep(zz,2,colMeans(zz), "-")
      Sigma  <- crossprod(zz)/(Tobs(zz) - 1)
      
      z <- estFAmodel(Sigma, p, n.obs=(Tobs(y) - diff.),
                est="factanal", 
		estArgs=estArgs,
                rotation=rotation, rotationArgs=rotationArgs, 
		GPFargs=GPFargs,
		BpermuteTarget=BpermuteTarget,
                factorNames=factorNames,
                indicatorNames=indicatorNames) 

      model <- TSFmodel(z,
                        f=tframed((y %*% diag(1/sqrt(diag(Sigma)))) %*% t(z$LB.std), tframe(y), names=factorNames), #hatf
			positive.data=all(0<y)
		       )
			 
       model$data <- y
       classed(model, c("TSFmodel", "fFAmodel", "FAmodel"))
      }

#######################################
#######################################

Try the tsfa package in your browser

Any scripts or data that you put into this service are public.

tsfa documentation built on Jan. 13, 2021, 10:53 a.m.