R/msir.R

Defines functions msir.componentsSlice msir.components msir.slices msir.nslices msir.recoverdir msir.parameters msir.regularizedSigma eigen.decomp predict.msir print.summary.msir summary.msir print.msir msir.fit msir

Documented in eigen.decomp msir msir.components msir.componentsSlice msir.fit msir.nslices msir.parameters msir.recoverdir msir.regularizedSigma msir.slices predict.msir print.msir print.summary.msir summary.msir

#############################################################################
##                                                                         ##
##                        Model-based SIR                                  ##
##                                                                         ##
## Written by Luca Scrucca                                                 ##
#############################################################################

msir <- function(x, y, 
                 nslices = msir.nslices, 
                 slice.function = msir.slices, 
                 modelNames = NULL, 
                 G = NULL, 
                 cov = c("mle", "regularized"), 
                 ...)
{ 
  call <- match.call()
  if(!is.numeric(cov)) 
    cov <- match.arg(cov)
  if(!is.function(msir.nslices) | is.numeric(msir.nslices)) 
    stop("nslices must be an integer value or a function")
  if(!is.function(slice.function)) 
    stop("slice.function must be a function")
  #
  xname <- deparse(substitute(x))
  x <- as.matrix(x)
  n <- nrow(x)
  p <- ncol(x)
  if(is.null(colnames(x))) 
     colnames(x) <- paste(xname, 1:p, sep="")
  #
  if(length(y) != n)
    stop("Dimension of y and x does not match!")
  if(is.character(y))
    y <- as.factor(y)
  if(is.factor(y))
    { nslices <- nlevels(y) }
  else
    { if(!is.numeric(nslices)) nslices <- nslices(n, p)
      nslices <- min(nslices, length(unique(y))) 
    }
  slice.info <- slice.function(y, nslices) 
  nslices <- slice.info$nslices
  ysl <- slice.info$slice.indicator
  #
  # overall parameters
  tau <- slice.info$slice.sizes/n
  mu <- colMeans(x)
  # Sigma <- var(x)*(n-1)/n
  # SVD <- svd(Sigma)
  # inv.sqrt.Sigma <- crossprod(t(SVD$v)*SVD$d^(-1/4))
  #
  # mixture model parameters
  if(is.null(G)) 
    { # guarantees at least 10 obs per slice, 
      # with num. of components per slice between 3 and 15
      G <- max(min((n/nslices)%/%10, 15), 3)
      G <- 1:G }
  if(is.null(modelNames))
    modelNames <- mclust.options("emModelNames")
  mixmod <- msir.fit(x, ysl, G = G, modelNames = modelNames, ...)
  # re-order wrt ysl
  tmp <- mixmod 
  for(j in 1:nslices) { tmp[j] <- mixmod[paste(j)] }
  names(tmp) <- paste(1:nslices)
  mixmod <- tmp; rm(tmp)
  #
  mixcomp <- sapply(mixmod, function(mod) mod$G)
  ncomp <- sum(mixcomp)
  ix <- cbind(cumsum(mixcomp)-mixcomp+1, cumsum(mixcomp))
  f <- rep(0, ncomp)
  mu.k <- matrix(0, ncomp, p)
  # Sigma.k <- array(0, dim = c(p,p,ncomp))
  for(h in 1:nslices)
     { par <- mixmod[[h]]$parameters
       pro <- par$pro[1:mixmod[[h]]$G]
       if(is.null(pro)) pro <- 1
       i <- seq(ix[h,1],ix[h,2])
       f[i] <- pro*tau[h]/sum(pro)
       mu.k[i,] <- t(par$mean)
     }
  # Note that: 
  # mu = colMeans(x) = apply(mu.k, 2, function(m) sum(m*f))
  # kernel matrix
  k <- length(f) # total number of components/groups
  M <- crossprod(t(t(mu.k)-mu)*sqrt(f))

  # generalized eigendecomposition
  if(is.numeric(cov))
    { # user provided
      Sigma <- cov
      SVD <- eigen.decomp(M, Sigma) 
    } else
  if(cov == "mle")
    { # MLE covar
        Sigma <- crossprod(scale(x, center = mu, scale = FALSE))/n
        SVD <- eigen.decomp(M, Sigma) 
    } else
  if(cov == "regularized")
    { # regularized covar
       RegSigma <- msir.regularizedSigma(x, inv = TRUE)
       Sigma <- RegSigma$Sigma
       SVD <- eigen.decomp(M, RegSigma$SigmaInv, inv = TRUE) 
    }
  evalues <- (SVD$d+abs(SVD$d))/2
  numdir <- min(p, sum(evalues > sqrt(.Machine$double.eps)))
  # evalues <- evalues[1:numdir]
  # raw basis
  V <- SVD$v
  # normalized basis
  B <- as.matrix(apply(V[,seq(numdir),drop=FALSE], 2, normalize))
  # standardized and normalized basis
  sdx <- sqrt(diag(Sigma))
  std.B <- as.matrix(apply(V[,1:numdir,drop=FALSE], 2, 
                           function(x) x*sdx))
  std.B <- as.matrix(apply(std.B, 2, normalize))  
  # directions
  z <- scale(x, center = mu, scale=FALSE) %*% B
  # set sign of coordinates to agree with ols
  sign <- as.vector(sign(cor(z,lm.fit(x,ysl)$fitted.values)))
  if(!any(is.na(sign)))
    { sign <- diag(sign, ncol = numdir)
      B <- B %*% sign
      std.B <- std.B %*% sign
      z <- z %*% sign 
    }
  dimnames(B) <- list(colnames(x), paste("Dir", seq(numdir), sep = ""))
  dimnames(std.B) <- dimnames(B)
  colnames(z) <- colnames(B)
  #
  out <- list(call = call, x = x, y = y, 
              slice.info = slice.info,
              mixmod = mixmod, 
              loglik = sum(sapply(mixmod, function(mod) mod$loglik)),
              f = f, mu = mu.k, sigma = Sigma, M = M, 
              evalues = evalues, evectors = V,
              basis = B, std.basis = std.B, 
              numdir = numdir, dir = z)
  class(out) <- "msir"
  return(out)
}


msir.fit <- function(data, labels, G = NULL, modelNames = NULL, 
                     control = emControl(itmax = c(.Machine$integer.max, 50)),
                     initialization = NULL, 
                     warn = FALSE, verbose = FALSE, ...) 
{
  mc <- match.call(expand.dots = TRUE)
  mc[[1]] <- as.name("mclustBIC")
  mc$labels <- mc$noise <- NULL
  mc$G <- mc$modelNames <- NULL
  mc$control <- control
  mc$verbose <- verbose
  dimData <- dim(data)
  oneD <- is.null(dimData) || length(dimData[dimData > 1]) == 1
  if(!oneD && length(dimData) != 2) 
    stop("data must be a vector or a matrix")
  if(oneD) 
    { data <- as.vector(data)
      n <- length(data)
      p <- 1
      data <- as.matrix(data) }
  else 
    { data <- as.matrix(data)
      n <- nrow(data)
      p <- ncol(data) }
  minObs <- msir.nslices(n,p)
  U <- sort(unique(labels))
  L <- length(U)
  S <- rep(0, L)
  M <- rep("XXX", L)
  #
  if(is.null(G)) 
    { G <- rep(list(1:5), L) }
  else if(is.list(G)) 
         { G <- lapply(G, sort) }
       else 
         { G <- rep(list(sort(G)), L) }
  if (any(unlist(G) <= 0))
      stop("G must be positive")
  #
  if(is.null(modelNames)) 
    { modelNames <- rep(list(mclust.options("emModelNames")), L) }
  else
  if(!is.list(modelNames)) 
    { modelNames <- rep(list(modelNames), L) }
  if(oneD)
    { for(l in 1:L)
         modelNames[l] <- ifelse(any(grep("V", modelNames)), "V", "E") 
    }
  #
  R <- rep(list(NULL), L)
  for(l in 1:L)
     { I <- (labels == U[l])
       X <- data[I,]
       mc[[2]] <- X
       mc$G <- G[[l]]
       mc$modelNames <- as.character(modelNames[[l]])
       BIC <- suppressWarnings(eval(mc, parent.frame()))
       if(all(is.na(BIC))) 
         { m <- seq(which(mclust.options("emModelNames") == mc$modelNames))
           if(length(m) == 0) m <- 1
           mc$modelNames <- mclust.options("emModelNames")[m]
           BIC <- suppressWarnings(eval(mc, parent.frame())) 
         }
       SUMMARY <- suppressWarnings(summary(BIC, X))
       # select best model with at least 5 obs
       i <- 1
       while(any(table(SUMMARY$classification) < minObs) & 
             sum(!is.na(BIC)) > i)
            { i <- i + 1
              mod <- strsplit(names(pickBIC(BIC, i))[i], ",")[[1]]
              mcc <- mc
              mcc[[1]] <- as.name("Mclust")
              mcc$modelNames <- mod[1]
              mcc$G <- mod[2]
              SUMMARY <- suppressWarnings(eval(mcc, parent.frame()))
            }
       S[l] <- SUMMARY$G
       M[l] <- SUMMARY$modelName
       R[[l]] <- c(SUMMARY, list(observations = (1:n)[I]))
     }
  names(S) <- M
  if(verbose) print(S)
  names(R) <- U
  R$Vinv <- NULL
  structure(R, G = G, modelNames = modelNames, 
            control = control, initialization = initialization, 
            warn = warn)
}


print.msir <- function(x, ...)
{
  catwrap(paste0("\'", class(x)[1], "\' model object: "))
  str(x, max.level = 1, give.attr = FALSE, strict.width = "wrap")
  invisible()
}

summary.msir <- function(object, numdir = object$numdir, std = FALSE, verbose = TRUE, ...)
{
  stopifnot(inherits(object, "msir"))

  tab <- rbind(sapply(object$mixmod, function(m) m$modelName),
               sapply(object$mixmod, function(m) m$G))
  sizes <- list()
  for(i in 1:ncol(tab))
     { m <- object$mixmod[[i]]
       sizes[[i]] <- as.vector(table(m$classification)) }
  tab <- rbind(tab, sapply(sizes, function(s) paste(s, collapse="|")))
  rownames(tab) <- c("GMM", "Num.comp.", "Num.obs.")
  
  basis <- object$basis[,seq(numdir),drop=FALSE]
  std.basis <- object$std.basis[,seq(numdir),drop=FALSE]
  rownames(basis) <- colnames(object$x)
  evalues <- object$evalues[seq(numdir)]
  evalues <- rbind("Eigenvalues" = evalues, 
                   "Cum. %" = cumsum(evalues/sum(object$evalues))*100)
  colnames(evalues) <- colnames(basis)

  StructDimTab <- NULL
  if(!is.null(object$bic))
    { bic <- signif(object$bic$crit)
      bic[object$bic$d+1] <- paste(bic[object$bic$d+1], "*", sep="")
      StructDimTab <- rbind("BIC-type criterion" = bic)
      colnames(StructDimTab) <- 0:(ncol(StructDimTab)-1)
    }
  if(!is.null(object$permtest))
    { s <- signif(object$permtest$summary$Stat)      
      nd <- ifelse(is.null(ncol(StructDimTab)), length(s), ncol(StructDimTab))
      StructDimTab <- rbind(StructDimTab, "Test statistic" = c(s, rep("", nd-length(s))))
      s <- signif(object$permtest$summary$"p-value")
      StructDimTab <- rbind(StructDimTab, "Permutation p-value" = c(s, rep("", nd-length(s))))
      colnames(StructDimTab) <- 0:(ncol(StructDimTab)-1)
    }

  out <- list(call = object$call, tab = tab, std = std,
              basis = basis, std.basis = std.basis, evalues = evalues,
              StructDimTab = StructDimTab, verbose = verbose)
  class(out) <- "summary.msir"
  return(out)
}

print.summary.msir <- function(x, digits = max(5, getOption("digits") - 3), ...)
{
  txt <- paste(rep("-", min(50, getOption("width"))), collapse = "")
  catwrap(txt)
  catwrap("Model-based SIR")
  catwrap(txt)
  
  cat("\nSlices:\n")
  print(x$tab, na.print="", quote=FALSE)

  if(x$verbose)
    { if(x$std) 
      { cat("\nStandardized basis vectors using predictors \nscaled to have std.dev. equal to one:\n")
        print(x$std.basis, digits = digits) }
      else    
      { cat("\nEstimated basis vectors:\n")
        print(x$basis, digits = digits) }
    }

  cat("\n")
  print(x$evalues, digits = digits)
      
  if(!is.null(x$StructDimTab))
    { cat("\nStructural dimension:\n")
      colnames(x$StructDimTab) <- 0:(ncol(x$StructDimTab)-1)
      print(x$StructDimTab, na.print="", quote=FALSE) }

  invisible()
}

predict.msir <- function(object, dim = 1:object$numdir, newdata, ...)
{  
  dim <- dim[dim <= object$numdir]
  if(missing(newdata))
  { 
    dir <- object$dir[,dim,drop=FALSE] 
  } else 
  { 
    newdata <- as.matrix(newdata) 
    newdata <- scale(newdata, center = colMeans(object$x), scale = FALSE)
    dir <- newdata %*% object$basis[,dim,drop=FALSE]
  } 
  return(dir)
}

eigen.decomp <- function(X1, X2, inv = FALSE, tol = sqrt(.Machine$double.eps))
{
#
# Eigenvalue decomposition of X1 with respect to X2 (see Li, 2000)
# If inv = TRUE then the second argument is already the inverse of X2
#
  
  # Computes inverse square root matrix such that: 
  #  t(inv.sqrt.X2) %*% inv.sqrt.X2    = 
  #     inv.sqrt.X2 %*% t(inv.sqrt.X2) = solve(X2)
  if(inv) 
    { SVD <- svd(X2, nu=0)
      inv.sqrt.X2 <- SVD$v %*% diag(sqrt(SVD$d), ncol(X2)) %*% t(SVD$v) 
  } else    
    { SVD <- svd(X2, nu=0)
      Positive <- SVD$d > tol
      SVD$d <- SVD$d[Positive]
      SVD$v <- SVD$v[,Positive,drop=FALSE]
      inv.sqrt.X2 <- SVD$v %*% diag(1/sqrt(SVD$d), sum(Positive)) %*% t(SVD$v)
  }
  # Compute  X2^(-1/2)' X1 X2^(-1/2) = VDV'
  # evectors = X2^(-1/2) V
  # evalues  = D
  X1.2 <- t(inv.sqrt.X2) %*% X1 %*% inv.sqrt.X2
  SVD <- svd(X1.2, nu=0)
  evalues  <- SVD$d
  evectors <- inv.sqrt.X2  %*% SVD$v
  #
  return(list(d = evalues, v = evectors))
}

# 
msir.regularizedSigma <- function(x, inv = FALSE, model = c("XII", "XXI", "XXX"))
{
  x <- as.matrix(x)
  n <- nrow(x)
  p <- ncol(x)
  bic <- vector("double", length(model))
  mod <- vector("list", length(model))
  names(bic) <- names(mod) <- model
  for(i in seq(model))
     { switch(model[i],
              "XII" = { mod[[i]] <- mvnXII(x) },
              "XXI" = { mod[[i]] <- mvnXXI(x) },
              "XXX" = { mod[[i]] <- mvnXXX(x) },
                      stop("model not available"))
       bic[i] <- do.call("bic", mod[[i]])
       # XII = 2*loglik - (p+1) * log(n)  
       # XXI = 2*loglik - (2*p) * log(n),
       # XXX = 2*loglik - (p+p*(p+1)/2) * log(n))
     }
  bestMod <- which.max(bic)
  par <- mod[[bestMod]]$parameters
  # mu <- par$mean
  Sigma <- par$variance$Sigma
  attr(Sigma, "model") <- mod[[bestMod]]$modelName
  #
  if(inv) 
    { out <- list(Sigma = Sigma, SigmaInv = NULL)
      switch(model[bestMod],
              "XII" = { out$SigmaInv <- diag(1/diag(par$variance$Sigma)) },
              "XXI" = { out$SigmaInv <- diag(1/diag(par$variance$Sigma)) },
              "XXX" = { out$SigmaInv <- solve(par$variance$Sigma) } )
    }
  else
    { out <- Sigma }  
  #
  return(out)
}

msir.parameters <- function(object, numdir = object$numdir)
{
  mu.x <- colMeans(object$x)
  x <- scale(object$x, center = mu.x, scale = FALSE)
  n <- nrow(x)
  p <- ncol(x)
  b <- object$basis[,1:numdir,drop=FALSE]
  mx <- object$mixmod
  nslices <- object$slice.info$nslices
  tau <-  object$slice.info$slice.sizes/n
  z <- x %*% b
  par <- list()
  for(sl in 1:nslices)
     { m <- mx[[sl]]
       G <- m$G
       pro <- if(G==1) 1 else m$parameter$pro
       mu <- scale(matrix(m$parameters$mean, ncol = p, byrow = TRUE), mu.x, scale = FALSE) %*% b
       if(m$d > 1)
         { # cho <- array(apply(m$parameters$variance$sigma, 3, chol), c(p, p, G))
           # sigma <- array(apply(cho, 3, function(R) crossprod(R %*% b)), 
           #              c(numdir, numdir, G)) 
           sigma <- array(apply(m$parameters$variance$sigma, 3, 
                                function(S) t(b) %*% S %*% b),
                          c(numdir, numdir, G))
         }
       else 
         { sigma <- array(m$parameters$variance$sigmasq, c(1,1,G)) }
       par[[sl]] <- list(pro = pro, mu = mu, sigma = sigma)
     }
  return(par)
}

msir.recoverdir <- function(object, data, normalized = TRUE, std = FALSE)
{
# Recover coefficients of the linear combination defining the MSIR directions.
# This is useful if the directions are obtained from other directions
  stopifnot(inherits(object, "msir"))
  
  x <- if(missing(data)) object$x else as.matrix(data)
  numdir <- object$numdir
  dir <- object$dir[,1:numdir,drop=FALSE]
  # dir <- scale(x, scale = FALSE) %*% object$raw.basis
  B <- as.matrix(coef(lm(dir ~ x)))[-1,,drop=FALSE]
  if(std) 
    { sdx <- sd(x)
      B <- apply(B, 2, function(x) x*sdx) }
  if(normalized) 
      B <- as.matrix(apply(B, 2, normalize))
  rownames(B) <- colnames(x)
  return(B)
}

##
msir.nslices <- function(n, p) 
{
# Sturges' type default number of slices
  # ceiling(log2(n/sqrt(p))+1)
  pmax(3, floor(log2(n/sqrt(p))))
}

## slicing functions in package 'dr' version. 3.0.x by Sanford Weisberg
msir.slices <- function(y, nslices)
{
    dr.slice.1d <- function(y, h) {
        z <- unique(y)
        if (length(z) > h)
            dr.slice2(y, h)
        else dr.slice1(y, sort(z))
    }
    dr.slice1 <- function(y, u) {
        z <- sizes <- 0
        for (j in 1:length(u)) {
            temp <- which(y == u[j])
            z[temp] <- j
            sizes[j] <- length(temp)
        }
        list(slice.indicator = z, nslices = length(u), slice.sizes = sizes)
    }
#     old version
#     dr.slice2<-function(y,h)
# 		{
# 		  or <- order(y)
# 		  n <- length(y)
# 		  m <- floor(n/h)
# 		  r <- n-m*h
# 		  start <- sp <- ans <-0
# 		  j <- 1
# 		  while((start+m)<n) {
# 		      if (r==0)
# 		        start<-start
# 		      else
# 		        {start<-start+1
# 		         r<-r-1
# 		        }
# 		       while (y[or][start+m]==y[or][start+m+1])
# 		          start<-start+1
# 		       sp[j]<-start+m
# 		       start<-sp[j]
# 		       j<-j+1
# 		  }
# 		  sp[j]<-n
# 		  ans[or[1:sp[1]]] <- 1
# 		  for (k in 2:j){ans[ or[(sp[k-1]+1):sp[k] ] ] <- k}
# 		  list(slice.indicator=ans, nslices=j, slice.sizes=c(sp[1],diff(sp)))
# 		}
    dr.slice2 <- function(y,h)
    {
       myfind <- function(x,cty) 
       {
          ans<-which(x <= cty)
          if (length(ans)==0) length(cty) else ans[1]
       } 
       or <- order(y)     # y[or] would return ordered y
       cty <- cumsum(table(y))  # cumulative sums of counts of y
       names(cty) <- NULL # drop class names
       n <- length(y)     # length of y
       m<-floor(n/h)      # nominal number of obs per slice
       sp <- end <- 0     # initialize
       j <- 0             # slice counter will end up <= h
       ans <- rep(1,n)    # initialize slice indicator to all slice 1
       while(end < n-2)   # find slice boundaries: all slices have at least 2 obs
       { 
          end <- end+m
          j <- j+1       
          sp[j] <- myfind(end,cty) 
          end <- cty[sp[j]]
       }
       sp[j] <- length(cty)
       for (j in 2:length(sp)) # build slice indicator
       { 
         firstobs <- cty[sp[j-1]]+1
         lastobs <- cty[sp[j]]
         ans[or[firstobs:lastobs]] <- j
       }
       list(slice.indicator = ans, nslices = length(sp),
            slice.sizes = c(cty[sp[1]],diff(cty[sp])))
    }
    
    p <- if (is.matrix(y)) dim(y)[2] else 1
	  h <- if (length(nslices) == p) nslices else rep(ceiling(nslices^(1/p)),p)
	  a <- dr.slice.1d( if(is.matrix(y)) y[,1] else y, h[1])
	  if (p > 1){
	    for (col in 2:p) {
	       ns <- 0
	       for (j in unique(a$slice.indicator)) {
	         b <- dr.slice.1d(y[a$slice.indicator==j,col],h[col])
	         a$slice.indicator[a$slice.indicator==j] <- a$slice.indicator[a$slice.indicator==j] + 10^(p-1)*b$slice.indicator
	         ns <- ns + b$nslices
	       }
	       a$nslices <- ns
	    }
	    #recode unique values to 1:nslices and fix up slice sizes
	    v <- unique(a$slice.indicator)
	    L <- NULL
	    for (i in 1:length(v)) {
	       sel <- a$slice.indicator==v[i]
	       a$slice.indicator[sel] <- i
	       L <- c(L,length(a$slice.indicator[sel]))
	    }
	    a$slice.sizes <- L
	  }
	  a		
}

msir.components <- function(object)
{
  stopifnot(inherits(object, "msir"))
  nslices <- length(object$mixmod)
  ysl <- object$slice.info$slice.indicator
  ycomp <- rep(NA, length(ysl))
  label <- 0
  for(i in 1:nslices)
     { m <- object$mixmod[[i]]
       labels <- seq(label+1,label+m$G)
       ycomp[ysl==i] <- labels[m$classification]
       label <- max(labels)
     }
  return(ycomp)
}

msir.componentsSlice <- function(object)
{
  stopifnot(inherits(object, "msir"))
  nslices <- length(object$mixmod)
  ycomp <- rep(NA, length(object$y))
  ysl <- object$slice.info$slice.indicator
  for(i in 1:nslices)
     { m <- object$mixmod[[i]]
       ycomp[ysl==i] <- m$classification }
  return(ycomp)
}
luca-scr/msir documentation built on March 2, 2024, 10:05 p.m.