R/clustergscairls.r

Defines functions clustergscairls gscalsresid

Documented in clustergscairls gscalsresid

##
##  clustergscairls.r                                                  R. Schlittgen 29.11.2017
##

#' Clustering gsc-models
#'
#' \code{clustergscairls} clusters data sets in that way that each cluster has a its own
#'  set of coefficients in the gsc-model.
#'
#' @param dat (n,p)-matrix, the values of the manifest variables
#' @param B   (q,q) lower triangular matrix describing the interrelations of the latent variables:
#'             b_ij= 1 regression coefficient of eta_j in the regression relation in which eta_i is
# '            the depend variable
#'             b_ij= 0 if eta_i does not depend on eta_j in a direct way  (b_ii = 0 !)
#' @param  indicatorx vector describing with which exogenous composite the X-variables are connected
#' @param  indicatory vector describing with which endogenous composite the Y-variables are connected
#' @param  loadingx  logical TRUE when there are loadings for the X-variables in the model
#' @param  loadingy  logical TRUE when there are loadings for the Y-variables in the model  
#' @param  k  scalar, the number of clusters to be found
#' @param  minmem number of the cluster's members or FALSE (then ist is set to 2*number of indicators)
#' @param  wieder  scalar, the number of random starts
#'
#' @return out list with components
#'   \tabular{ll}{
#'               member \tab (n,1)-vector, indicator of membership  \cr
#'               Bhat \tab (k,q,q)-array, the path coefficients of the clusters \cr
#'               lambda \tab (p,k)-matrix, the loadings of the clusters \cr
#'               fitall \tab the total fit measure for the structural models only \cr
#'               fit \tab vector of length k, the fit values of the different models \cr
#'               R2 \tab (k,q) matrix, the coefficients of determination for the structural regression equations
#'               }
#' @examples
#' \donttest{ 
#' data(twoclm)
#' dat <- twoclm[,-10]
#' B <- matrix(c( 0,0,0, 0,0,0, 1,1,0),3,3,byrow=TRUE)
#' indicatorx <- c(1,1,1,2,2,2)
#' indicatory <- c(1,1,1)  
#' out <- clustergscairls(dat,B,indicatorx,indicatory,loadingx=FALSE,loadingy=FALSE,2,minmem=6,1) 
#' }
#' @export
clustergscairls <- function(dat,B,indicatorx,indicatory,loadingx=FALSE,loadingy=FALSE,k,minmem=FALSE,wieder){

# standardisation of the data

dat <- scale(dat,center=TRUE,scale=TRUE)
dat <- as.matrix(dat)

n <- nrow(dat)
p1 <- length(indicatorx)                     # number of indicators of exogenous latent v.
p2 <- length(indicatory)                     # number of indicators of endogenous latent v.
p <- p1+p2                                       # number of manifest variables
q1<- length(unique(indicatorx))          # number of  exogenous latent v.
q2<- length(unique(indicatory))          # number of endogenous latent v.
q <- q1+q2                                       # number of latent variables

if(minmem == FALSE){minmem  <- 2*p }                                # minimal number of members fo a cluster
refl <- c(indicatorx,q1+indicatory)
W0 <- matrix(refl,p,q)
W0 <- 1*(W0 == matrix(c(1:q),p,q,byrow=TRUE))
W2 <- W0[,(q1+1):q,drop=FALSE] 
ind <- c(1:n)
indlv <- c(1:q)
indlvreg <- c((q1+1):q)                  # indices of endogenous latent variables
ni <- rep(0,k)

member <- matrix(0,n,wieder)
fit <- numeric(wieder)

#  generate "wieder" solutions with random starts

for( wie in (1:wieder)){

  weights <- matrix(1/(k*100),n,k)
  for( i in c(1:n)){  weights[i,sample(c(1:k),1)] <- 1-(k-1)/(k*100) }
  rest <- matrix(0,n,k)
  coeff <- matrix(NA,length(refl),k)
  param <-  matrix(1,q^2 + 2*length(refl),k)
  differenz <-  rep(1,k)

  B2 <-  array(NA, dim=c(q,q,k))
  lambda <- matrix(NA,p,k)
  ss2 <- matrix(NA,p,k)
  R2 <- matrix(NA,k,q2)
  R2w <- rep(0,q2)
  y <- matrix(0,n,q)
  w <- matrix(0,n,q)
  zaehl <- numeric(k)
  nenn <- numeric(k)

  ite <- 0
  while ((ite<100) & (sum(abs(differenz)) > 0.0001) ){                        # iteration for adjusting the weights
      ite <- ite+1
      oldparam <- param
      for( j in c(1:k)){                                                    # estimation of separate models for the k sets of weights 
        out <- gscals(dat*weights[,j],B,indicatorx,indicatory,loadingx=loadingx,loadingy=loadingy)
                                                                                                        # compute residuals without weighting
       resi <-    gscalsresid(dat,out,indicatorx,indicatory,loadingx,loadingy) 
        rest[,j] <- apply((resi[,1:q2,drop=FALSE])^2,1,sum)                      # only residuals of the inner model
        param[,j] <- c(as.vector(out$Bhat),out$lambdahat,as.vector(rowSums(out$What)))
        differenz[j] <- sum(abs(param[1:(q^2),j]-oldparam[1:(q^2),j]))
      }
      ares <- (rest + 1e-8)^-1
      weights <- ares/apply(ares,1,sum)
  }
  member[,wie] <-  apply(weights,1,which.max)                  # assignment of membership from the models found

  # coefficients of determination / sums of squared residuals ( models without weighting)

  zaehl <- rep(NA,k)
  nenn <- rep(NA,k)
  for( i in c(1:k)){
   indi <- ind[member[,wie] == i]
   dat1 <- dat[indi,]
     out <- gscals(dat1,B,indicatorx,indicatory,loadingx=loadingx,loadingy=loadingy)
     zaehl[i] <- sum(diag(t(out$resid[,1:q2,drop=F])%*%out$resid[,1:q2,drop=F]))  #  fit for stucture model only
     zwi <- dat%*%(W2*rowSums(out$What))
     nenn[i] <- sum(diag(t(zwi)%*%zwi))

   }
  fit[wie] <- 1- sum(zaehl, na.rm = T)/sum(nenn, na.rm = T)
}                                                                                    # end of iteration "wieder"

member <- member[,which.max(fit)]                               # final cluster-membership

# computations for return

zaehl <- rep(NA,k)
nenn <- rep(NA,k)
fit <- rep(NA,k)

for (i in c(1:k)){
 indi <- ind[member==i]
 dat1 <- dat[member == i,]
 ni[i] <- nrow(dat1)
 if( nrow(dat1) > minmem ){
   out <- gscals(dat1,B,indicatorx,indicatory,loadingx=loadingx,loadingy=loadingy)
   R2[i,] <- out$R2
   lambda[,i] <- out$lambdahat
   B2[,,i] <- out$Bhat
   fit[i] <- out$fit
   zaehl[i] <- sum(diag(t(out$resid[,1:q2,drop=F])%*%out$resid[,1:q2,drop=F]))  #  fit for stuctural model only
   zwi <- dat%*%(W2*rowSums(out$What))
   nenn[i] <- sum(diag(t(zwi)%*%zwi))
   # zaehl[i] <- out$ziel[2]
   # nenn[i] <- out$ziel[2]/(1-out$fit)
  }
}

fitall <-  1- sum(zaehl, na.rm = T)/sum(nenn, na.rm = T)

return(list(member=member,Bhat=B2,lambda=lambda,fitall=fitall,fit=fit,R2=R2))
}


#' For use in clustergscairls, residuals of a gsc-model
#'
#' \code{gscalsresid} computes the residuals of a gsc-model when the parameters and weights are given
#'
#' @param  dat   (n,p) data matrix
#' @param  out  list, output from gscals
#' @param  indicatorx vector describing with which exogenous composite the X-variables are connected
#' @param  indicatory vector describing with which endogenous composite the Y-variables are connected
#' @param  loadingx logical TRUE when there are loadings for the X-variables in the model
#' @param  loadingy logical TRUE when there are loadings for the y-variables in the model
#'
#' @return   resid (n,q2) matrix of residuals from structural model, the q2 is the number of endogenous composites .
#'
#' @examples
#' data(simplemodel) 
#' data(gscalsout)
#' B <- matrix(c( 0,0,0, 0,0,0, 0.7,0.4,0),3,3,byrow=TRUE)
#' indicatorx <- c(1,1,1,2,2,2)
#' indicatory <- c(1,1,1)  
#' out <- gscalsresid(simplemodel,gscalsout,indicatorx,indicatory,TRUE,TRUE)
#'
#' @export
gscalsresid <- function(dat,out,indicatorx,indicatory,loadingx,loadingy){
           Bhat <- out$Bhat
           W <- out$What
           lambda <- out$lambdahat
if( (loadingx == TRUE) & (loadingy == TRUE) ){ scenario <- 1 }
if( (loadingx == FALSE) & (loadingy == TRUE) ){ scenario <- 2 }
if( (loadingx == FALSE) & (loadingy == FALSE) ){ scenario <- 3 }

# standardisation of the data

n <- nrow(dat)
dat <- scale(dat,center=TRUE,scale=TRUE)
dat <- as.matrix(dat)

# construction of matricies W1, W2 and Lambdax, Lambday

p1 <- length(indicatorx)                     # number of indicators of exogenous latent v.
p2 <- length(indicatory)                     # number of indicators of endogenous latent v.
p <- p1+p2                                       # number of manifest variables
q1<- length(unique(indicatorx))          # number of  exogenous latent v.
q2<- length(unique(indicatory))          # number of endogenous latent v.
q <- q1+q2                                       # number of latent variables


x <- indicatorx
Lx <- matrix(indicatorx,p1,q1)
Lx <- 1*(Lx == matrix(c(1:q1),p1,q1,byrow=T))
Ly <- matrix(indicatory,p2,q2)
Ly <- 1*(Ly == matrix(c(1:q2),p2,q2,byrow=T))
Lambdax = Lx*lambda[1:p1]
Lambday = Ly*lambda[(p1+1):p]

refl <- c(indicatorx,q1+indicatory)
W1 <- W[,1:q1, drop=FALSE]
W2 <- W[,(q1+1):(q1+q2), drop=FALSE]

if(scenario == 1){                                                          # for scenario1
  V <- cbind(W2,diag(rep(1,p)))
  U <- W
  A <- t(Bhat[(q1+1):(q1+q2),, drop=FALSE])
  A <- cbind(A,rbind(t(Lambdax),matrix(0,q2,p1)),rbind(matrix(0,q1,p2),t(Lambday)))
 }
else if( scenario ==2 ){                                                  # for scenario2
  V <- cbind(W2,rbind(matrix(0,p1,p2),diag(rep(1,p2))))
  U <- cbind(W1,W2)
  A <- rbind((t(Bhat[(q1+1):(q1+q2),1:q1, drop=FALSE])),(t(Bhat[(q1+1):(q1+q2),(q1+1):(q1+q2), drop=FALSE])))
  A <- cbind(A,rbind(matrix(0,q1,p2),t(Lambday)))
}
else{                                                                            # for scenario1
   V <- W2
   U <- W
   A <- t(Bhat[(q1+1):(q1+q2), ,drop=FALSE])
   }
res <- dat%*%(V-U%*%A)

return(res)
}

Try the cbsem package in your browser

Any scripts or data that you put into this service are public.

cbsem documentation built on May 2, 2019, 5:56 a.m.