R/util.R

Defines functions as.densityMclust.Mclust as.densityMclust.default as.densityMclust as.Mclust.default as.Mclust catwrap bubble blue2grey.colors hdrlevels uncerPlot surfacePlot randProj coordProj clPairsLegend clPairs covw errorBars vecnorm unchol traceW shapeO partuniq partconv logsumexp orth2 unmap map mapClass classError adjustedRandIndex

Documented in adjustedRandIndex as.densityMclust as.densityMclust.default as.densityMclust.Mclust as.Mclust as.Mclust.default classError clPairs clPairsLegend coordProj covw errorBars hdrlevels map mapClass orth2 partconv partuniq randProj shapeO surfacePlot traceW uncerPlot unchol unmap vecnorm

adjustedRandIndex <- function (x, y) 
{
  x <- as.vector(x)
  y <- as.vector(y)
  if(length(x) != length(y)) 
    stop("arguments must be vectors of the same length")
  tab <- table(x,y)
  if(all(dim(tab)==c(1,1))) return(1)
  a <- sum(choose(tab, 2))
  b <- sum(choose(rowSums(tab), 2)) - a
  c <- sum(choose(colSums(tab), 2)) - a
  d <- choose(sum(tab), 2) - a - b - c
  ARI <- (a - (a + b) * (a + c)/(a + b + c + d)) /
    ((a + b + a + c)/2 - (a + b) * (a + c)/(a + b + c + d))
  return(ARI)
}


classError <- function(classification, truth)
{
  q <- function(map, len, x)
  {
    x <- as.character(x)
    map <- lapply(map, as.character)
    y <- sapply(map, function(x) x[1])
    best <- y != x
    if(all(len) == 1)
      return(best)
    errmin <- sum(as.numeric(best))
    z <- sapply(map, function(x) x[length(x)])
    mask <- len != 1
    counter <- rep(0, length(len))
    k <- sum(as.numeric(mask))
    j <- 0
    while(y != z) 
    {
      i <- k - j
      m <- mask[i]
      counter[m] <- (counter[m] %% len[m]) + 1
      y[x == names(map)[m]] <- map[[m]][counter[m]]
      temp <- y != x
      err <- sum(as.numeric(temp))
      if(err < errmin) 
      {
        errmin <- err
        best <- temp
      }
      j <- (j + 1) %% k
    }
    best
  }
  if (any(isNA <- is.na(classification))) 
  {
    classification <- as.character(classification)
    nachar <- paste(unique(classification[!isNA]),collapse="")
    classification[isNA] <- nachar
  }
  MAP <- mapClass(classification, truth)
  len <- sapply(MAP[[1]], length)
  if(all(len) == 1) 
  {
    CtoT <- unlist(MAP[[1]])
    I <- match(as.character(classification), names(CtoT), nomatch= 0)
    one <- CtoT[I] != truth
  } else 
  {
    one <- q(MAP[[1]], len, truth)
  }
  len <- sapply(MAP[[2]], length)
  if(all(len) == 1) 
  {
    TtoC <- unlist(MAP[[2]])
    I <- match(as.character(truth), names(TtoC), nomatch = 0)
    two <- TtoC[I] != classification
  } else 
  {
    two <- q(MAP[[2]], len, classification)
  }
  err <- if(sum(as.numeric(one)) > sum(as.numeric(two))) 
            as.vector(one) else as.vector(two)
  bad <- seq(along = classification)[err]
  list(misclassified = bad, errorRate = length(bad)/length(truth))
}

mapClass <- function(a, b)
{
  l <- length(a)
  x <- y <- rep(NA, l)
  if(l != length(b)) {
    warning("unequal lengths")
    return(x)
  }
  aChar <- as.character(a)
  bChar <- as.character(b)
  Tab <- table(a, b)
  Ua <- dimnames(Tab)[[1]]
  Ub <- dimnames(Tab)[[2]]
  aTOb <- rep(list(Ub), length(Ua))
  names(aTOb) <- Ua
  bTOa <- rep(list(Ua), length(Ub))
  names(bTOa) <- Ub
  # -------------------------------------------------------------
  k <- nrow(Tab)
  Map <- rep(0, k)
  Max <- apply(Tab, 1, max)
  for(i in 1:k) 
  {
    I <- match(Max[i], Tab[i,  ], nomatch = 0)
    aTOb[[i]] <- Ub[I]
  }
  if(is.numeric(b))
    aTOb <- lapply(aTOb, as.numeric)
  k <- ncol(Tab)
  Map <- rep(0, k)
  Max <- apply(Tab, 2, max)
  for(j in (1:k)) {
    J <- match(Max[j], Tab[, j])
    bTOa[[j]] <- Ua[J]
  }
  if(is.numeric(a))
    bTOa <- lapply(bTOa, as.numeric)
  # -------------------------------------------------------------
  return(list(aTOb = aTOb, bTOa = bTOa))
}

map <- function(z, warn = mclust.options("warn"), ...)
{
  nrowz <- nrow(z)
  cl <- numeric(nrowz)
  I <- 1:nrowz
  J <- 1:ncol(z)
  for(i in I) 
     { cl[i] <- (J[z[i,  ] == max(z[i,  ])])[1] }
  if(warn) 
    { K <- as.logical(match(J, sort(unique(cl)), nomatch = 0))
      if(any(!K))
        warning(paste("no assignment to", paste(J[!K], 
                      collapse = ",")))
  }
  return(cl)
}

unmap <- function(classification, groups=NULL, noise=NULL, ...)
{
  # converts a classification to conditional probabilities
  # classes are arranged in sorted order unless groups is specified
  # if a noise indicator is specified, that column is placed last
  n <- length(classification)
  u <- sort(unique(classification))
  if(is.null(groups))
    { groups <- u }
  else 
    { if(any(match( u, groups, nomatch = 0) == 0)) 
      stop("groups incompatible with classification")
      miss <- match( groups, u, nomatch = 0) == 0
    }
  cgroups <- as.character(groups)
  if(!is.null(noise)) 
    { noiz <- match( noise, groups, nomatch = 0)
      if(any(noiz == 0)) stop("noise incompatible with classification")
      groups <- c(groups[groups != noise],groups[groups==noise])
      noise <- as.numeric(factor(as.character(noise), levels = unique(groups)))
  }
  groups <- as.numeric(factor(cgroups, levels = unique(cgroups)))
  classification <- as.numeric(factor(as.character(classification), levels = unique(cgroups)))
  k <- length(groups) - length(noise)
  nam <- levels(groups)
  if(!is.null(noise)) 
    { k <- k + 1
      nam <- nam[1:k]
      nam[k] <- "noise"
  }
  z <- matrix(0, n, k, dimnames = c(names(classification),nam))
  for(j in 1:k)
     { z[classification == groups[j], j] <- 1 }
  return(z)
}

orth2 <- function (n) 
{
  u <- rnorm(n)
  u <- u/vecnorm(u)
  v <- rnorm(n)
  v <- v/vecnorm(v)
  Q <- cbind(u, v - sum(u * v) * u)
  dimnames(Q) <- NULL
  Q
}

logsumexp <- function(x)
{ 
# Numerically efficient implementation of log(sum(exp(x)))
  max <- max(x)
  max + log(sum(exp(x-max)))
}

partconv <- function(x, consec = TRUE)
{
  n <- length(x)
  y <- numeric(n)
  u <- unique(x)
  if(consec) {
    # number groups in order of first row appearance
    l <- length(u)
    for(i in 1:l)
      y[x == u[i]] <- i
  }
  else {
    # represent each group by its lowest-numbered member
    for(i in u) {
      l <- x == i
      y[l] <- (1:n)[l][1]
    }
  }
  y
}

partuniq <- function(x)
{
  # finds the classification that removes duplicates from x
  charconv <- function(x, sep = "001")
  {
    if(!is.data.frame(x)) x <- data.frame(x)
    do.call("paste", c(as.list(x), sep = sep))
  }
  
  n <- nrow(x)
  x <- charconv(x)
  k <- duplicated(x)
  partition <- 1.:n
  partition[k] <- match(x[k], x)
  partition
}

shapeO <- function(shape, O, transpose = FALSE)
{
  dimO <- dim(O)
  if(dimO[1] != dimO[2])
    stop("leading dimensions of O are unequal")
  if((ldO <- length(dimO)) != 3) {
    if(ldO == 2) {
      dimO <- c(dimO, 1)
      O <- array(O, dimO)
    }
    else stop("O must be a matrix or an array")
  }
  l <- length(shape)
  if(l != dimO[1])
    stop("dimension of O and length s are unequal")
  storage.mode(O) <- "double"
  .Fortran("shapeo",
           as.logical(transpose),
           as.double(shape),
           O,
           as.integer(l),
           as.integer(dimO[3]),
           double(l * l),
           integer(1),
           PACKAGE = "mclust")[[3]]
}

traceW <- function(x)
{
  # sum(as.vector(sweep(x, 2, apply(x, 2, mean)))^2)
  dimx <- dim(x)
  n <- dimx[1]
  p <- dimx[2]
  .Fortran("mcltrw",
           as.double(x),
           as.integer(n),
           as.integer(p),
           double(p),
           double(1),
           PACKAGE = "mclust")[[5]]
}

unchol <- function(x, upper = NULL)
{
  if(is.null(upper)) {
    upper <- any(x[row(x) < col(x)])
    lower <- any(x[row(x) > col(x)])
    if(upper && lower)
      stop("not a triangular matrix")
    if(!(upper || lower)) {
      x <- diag(x)
      return(diag(x * x))
    }
  }
  dimx <- dim(x)
  storage.mode(x) <- "double"
  .Fortran("uncholf",
           as.logical(upper),
           x,
           as.integer(nrow(x)),
           as.integer(ncol(x)),
           integer(1),
           PACKAGE = "mclust")[[2]]
}

vecnorm <- function (x, p = 2) 
{
  if (is.character(p)) {
    if (charmatch(p, "maximum", nomatch = 0) == 1) 
      p <- Inf
    else if (charmatch(p, "euclidean", nomatch = 0) == 1) 
      p <- 2
    else stop("improper specification of p")
  }
  if (!is.numeric(x) && !is.complex(x)) 
    stop("mode of x must be either numeric or complex")
  if (!is.numeric(p)) 
    stop("improper specification of p")
  if (p < 1) 
    stop("p must be greater than or equal to 1")
  if (is.numeric(x)) 
    x <- abs(x)
  else x <- Mod(x)
  if (p == 2) 
    return(.Fortran("d2norm", as.integer(length(x)), as.double(x), 
                    as.integer(1), double(1), PACKAGE = "mclust")[[4]])
  if (p == Inf) 
    return(max(x))
  if (p == 1) 
    return(sum(x))
  xmax <- max(x)
  if (!xmax) 
    xmax <- max(x)
  if (!xmax) 
    return(xmax)
  x <- x/xmax
  xmax * sum(x^p)^(1/p)
}

errorBars <- function(x, upper, lower, width = 0.1, code = 3, angle = 90, horizontal = FALSE, ...) 
{ 
# Draw error bars at x from upper to lower. If horizontal = FALSE (default)
# bars are drawn vertically, otherwise horizontally.
  if(horizontal)
    arrows(upper, x, lower, x, length = width, angle = angle, code = code, ...)
  else  
    arrows(x, upper, x, lower, length = width, angle = angle, code = code, ...)
}

covw <- function(X, Z, normalize = TRUE)
# Given data matrix X(n x p) and weight matrix Z(n x G) computes
# weighted means(p x G), weighted covariance matrices S(p x p x G) and
# weighted scattering matrices W(p x p x G)
{
  X <- as.matrix(X)
  Z <- as.matrix(Z)
  n <- nrow(X)
  p <- ncol(X)
  nZ <- nrow(Z)
  G <- ncol(Z)
  if(n != nZ) 
    stop("X and Z must have same number of rows")
  if(normalize)
    Z <- t( apply(Z, 1, function(z) z/sum(z)) )
  
  tmp <- .Fortran("covwf",
                  X = as.double(X),
                  Z = as.double(Z),
                  n = as.integer(n),
                  p = as.integer(p),
                  G = as.integer(G),
                  mean = double(p*G),
                  S = double(p*p*G),
                  W = double(p*p*G),
                  PACKAGE = "mclust")
  
  out <- list(mean = matrix(tmp$mean, p,G), 
              S = array(tmp$S, c(p,p,G)),
              W = array(tmp$W, c(p,p,G)) )
  return(out)
}

clPairs <- function (data, classification, symbols = NULL, colors = NULL, 
                     labels = dimnames(data)[[2]], CEX = 1, gap = 0.2, ...) 
{
  data <- as.matrix(data)
  n <- nrow(data)
  d <- ncol(data)
  if(missing(classification)) 
    classification <- rep(1, n)
  if(!is.factor(classification)) 
    classification <- as.factor(classification)
  l <- length(levels(classification))
  if(length(classification) != n)
    stop("classification variable must have the same length as nrows of data!")
  if(missing(symbols)) 
    { if(l == 1) 
        { symbols <- "." }
      if(l <= length(mclust.options("classPlotSymbols")))
        { symbols <- mclust.options("classPlotSymbols") }
      else { if(l <= 9) { symbols <- as.character(1:9) }
             else if(l <= 26) { symbols <- LETTERS[1:l] }
                  else symbols <- rep(16,l)
           }
  }
  if(length(symbols) == 1) symbols <- rep(symbols, l)
  if(length(symbols) < l) 
    { symbols <- rep(16, l)
      warning("more symbols needed")
  }
  if(is.null(colors)) 
    { if(l <= length(mclust.options("classPlotColors"))) 
      colors <- mclust.options("classPlotColors")[1:l]
  }
  if(length(colors) == 1) colors <- rep(colors, l)
  if(length(colors) < l) 
    { colors <- rep( "black", l)
      warning("more colors needed")
  }

  if(d > 2)
    { pairs(x = data, labels = labels, 
            pch = symbols[classification], 
            cex = CEX, col = colors[classification], 
            gap = gap, ...) }
  else if(d == 2)
    { plot(data, cex = CEX, 
            pch = symbols[classification], 
            col = colors[classification], 
            ...) }
  
  invisible(list(d = d,
                 class = levels(classification), 
                 col = colors,
                 pch = symbols[seq(l)]))
}

clPairsLegend <- function(x, y, class, col, pch, box = TRUE, ...)
{
  usr <- par("usr")
  
  if(box & all(usr == c(0,1,0,1))) 
  {
    oldpar <- par(mar = rep(0.3, 4), no.readonly = TRUE)
    on.exit(par(oldpar))
    box(which = "plot")
  }
  
  if(!all(usr == c(0,1,0,1)))
  {
    x <- x*(usr[2]-usr[1])+usr[1]
    y <- y*(usr[4]-usr[3])+usr[3]
  }

  legend(x = x, y = y, legend = class, 
         col = col, text.col = col, pch = pch, 
         title.col = par("fg"), xpd = NA, ...)
}

coordProj <- function(data, dimens = c(1,2), parameters = NULL, 
                      z = NULL, classification = NULL, 
                      truth = NULL, uncertainty = NULL, 
                      what = c("classification", "errors", "uncertainty"), 
                      addEllipses = TRUE, symbols = NULL, 
                      colors = NULL, scale = FALSE, xlim = NULL, ylim = NULL, 
                      CEX = 1, PCH = ".", main = FALSE, ...)
{
  if(is.null(dimens)) dimens <- c(1, 2)
  if(is.null(classification) && !is.null(z))
    classification <- map(z)
  if(is.null(uncertainty) && !is.null(z))
    uncertainty <- 1 - apply(z, 1, max)
  if(!is.null(parameters)) 
    { mu <- parameters$mean
      L <- ncol(mu)
      sigma <- parameters$variance$sigma
      haveParams <- !is.null(mu) && !is.null(sigma) && !any(is.na(mu)) && !any( is.na(sigma))
  }
  else haveParams <- FALSE
  data <- data[, dimens, drop = FALSE]
  if(dim(data)[2] != 2)
    stop("need two dimensions")
  if(is.null(xlim))
    xlim <- range(data[, 1])
  if(is.null(ylim))
    ylim <- range(data[, 2])
  if(scale) {
    par(pty = "s")
    d <- diff(xlim) - diff(ylim)
    if(d > 0) {
      ylim <- c(ylim[1] - d/2, ylim[2] + d/2.)
    }
    else {
      xlim <- c(xlim[1] + d/2, xlim[2] - d/2)
    }
  }
  if(is.null(dnames <- dimnames(data)[[2]]))
    xlab <- ylab <- ""
  else {
    xlab <- dnames[1]
    ylab <- dnames[2]
  }
  main <- if(is.null(main) || is.character(main)) FALSE else as.logical(main)
  if(haveParams) {
    G <- ncol(mu)
    dimpar <- dim(sigma)
    if(length(dimpar) != 3) {
      haveParams <- FALSE
      warning("covariance must be a 3D matrix")
    }
    if(G != dimpar[3]) {
      haveParams <- FALSE
      warning("means and variance parameters are incompatible")
    }
    mu <- array(mu[dimens,  ], c(2, G))
    sigma <- array(sigma[dimens, dimens,  ], c(2, 2, G))
  }
  if(!is.null(truth)) 
  {
    if(is.null(classification)) {
      classification <- truth
      truth <- NULL
    }
  }
  if(!is.null(classification)) 
  {
    classification <- as.character(classification)
    U <- sort(unique(classification))
    L <- length(U)
    noise <- (U[1] == "0")
    # browser()
    if(is.null(symbols)) {
      if(L <= length(mclust.options("classPlotSymbols"))) {
        symbols <- mclust.options("classPlotSymbols")[1:L]
        if(noise) 
          { symbols <- c(16,symbols)[1:L] }
        }
      else if(L <= 9) {
        symbols <- as.character(1:9)
      }
      else if(L <= 26) {
        symbols <- LETTERS
      }
    }
    else if(length(symbols) == 1)
      symbols <- rep(symbols, L)
    if(is.null(colors)) {
      if(L <= length(mclust.options("classPlotColors"))) 
      {
        colors <- mclust.options("classPlotColors")[1:L]
        if(noise) 
          { colors <- unique(c("black", colors))[1:L] }
      }
    }
    else if(length(colors) == 1)
      colors <- rep(colors, L)
    if(length(symbols) < L) {
      warning("more symbols needed to show classification ")
      symbols <- rep(16,L)
    }
    if(length(colors) < L) {
      warning("more colors needed to show classification ")
      colors <- rep("black",L)
    }
  }
  if(length(what) > 1)
    what <- what[1]
  choices <- c("classification", "errors", "uncertainty")
  m <- charmatch(what, choices, nomatch = 0)
  if(m) {
    what <- choices[m]
    bad <- what == "classification" && is.null(classification)
    bad <- bad || (what == "uncertainty" && is.null(uncertainty))
    bad <- bad || (what == "errors" && (is.null(classification) || is.null(
      truth)))
    if(bad)
      warning("insufficient input for specified plot")
    badClass <- (what == "errors" && (length(unique(classification)) != length(
      unique(truth))))
    if(badClass && !bad)
      warning("classification and truth differ in number of groups")
    bad <- bad && badClass
  } else 
  {
    bad <- !m
    warning("what improperly specified")
  }
  if(bad) what <- "bad"
  
  switch(EXPR = what,
         "classification" = {
           plot(data[, 1], data[, 2], type = "n", xlab = xlab, ylab = ylab, 
                xlim = xlim, ylim = ylim, main = "", ...)
           if(main) {
             TITLE <- paste(paste(dimens, collapse = ","), 
                            "Coordinate Projection showing Classification")
             title(main = TITLE)
           }
           for(k in 1:L) {
             I <- classification == U[k]
             points(data[I, 1], data[I, 2], pch = symbols[k], col = colors[k], 
                    cex = if(U[k] == "0") CEX/3 else CEX)
           }
         },
         "errors" = {
           ERRORS <- classError(classification, truth)$misclassified
           plot(data[, 1], data[, 2], type = "n", xlab = xlab, ylab = ylab, 
                xlim = xlim, ylim = ylim, main = "", ...)
           if(main) {
             TITLE <- paste(paste(dimens, collapse = ","), 
                            "Coordinate Projection showing Errors")
             title(main = TITLE)
           }
           CLASSES <- unique(as.character(truth))
           symOpen <- c(2, 0, 1, 5)
           symFill <- c(17, 15, 16, 18)
           good <- rep(TRUE, length(classification))
           good[ERRORS] <- FALSE
           if(L > 4) {
             points(data[good, 1], data[good, 2], pch = 1, col = colors, cex = CEX)
             points(data[!good, 1], data[!good, 2], pch = 16, cex = CEX)
           }
           else {
             for(k in 1:L) {
               K <- truth == CLASSES[k]
               if(any(I <- (K & good))) {
                 points(data[I, 1], data[I, 2], pch = symOpen[k], col = colors[k], 
                        cex = CEX)
               }
               if(any(I <- (K & !good))) {
                 points(data[I, 1], data[I, 2], pch = symFill[k], cex = CEX)
               }
             }
           }
         },
         "uncertainty" = { 
           u <- (uncertainty - min(uncertainty)) /
                (max(uncertainty) - min(uncertainty) + sqrt(.Machine$double.eps))
           b <- bubble(u, cex = CEX * c(0.3, 2), alpha = c(0.3, 0.9))
           cl <- sapply(classification, function(cl) which(cl == U))
           plot(data[, 1], data[, 2], pch = 19, main = "", 
                xlab = xlab, ylab = ylab, 
                xlim = xlim, ylim = ylim,
                cex = b$cex, 
                col = mapply(adjustcolor, col = colors[cl], alpha.f = b$alpha), 
                ...)
           if(main) 
             { TITLE <- paste(paste(dimens, collapse = ","), 
                              "Coordinate Projection showing Uncertainty")
               title(main = TITLE)
           }
         },
         { plot(data[, 1], data[, 2], type = "n", 
                xlab = xlab, ylab = ylab, 
                xlim = xlim, ylim = ylim, main = "", ...)
           if(main) 
             { TITLE <- paste(paste(dimens, collapse = ","), "Coordinate Projection")
               title(main = TITLE) }
           points(data[, 1], data[, 2], pch = PCH, cex = CEX)
         }
  )
  if(haveParams && addEllipses)
    { ## plot ellipsoids
      for(k in 1:G)
        mvn2plot(mu = mu[,k], sigma = sigma[,,k], k = 15)
  }

  invisible()
}

randProj <- function(data, seeds = 0, 
                     parameters = NULL, z = NULL,
                     classification = NULL, truth = NULL, 
                     uncertainty = NULL, 
                     what = c("classification", "errors", "uncertainty"), 
                     quantiles = c(0.75, 0.95), symbols = NULL, 
                     colors = NULL, scale = FALSE, 
                     xlim = NULL, ylim = NULL, CEX = 1, PCH = ".", 
                     main = FALSE, ...)
{
  if(scale) par(pty = "s")
  if(is.null(classification) && !is.null(z))
    classification <- map(z)
  if(is.null(uncertainty) && !is.null(z))
    uncertainty <- 1 - apply(z, 1, max)
  if(!is.null(parameters)) {
    mu <- parameters$mean
    L <- ncol(mu)
    sigma <- parameters$variance$sigma
    haveParams <- !is.null(mu) && !is.null(sigma) && !any(is.na(mu)) && !any(
      is.na(sigma))
  }
  else haveParams <- FALSE
  xlab <- ylab <- ""
  p <- ncol(data)
  if(haveParams) {
    G <- ncol(mu)
    dimpar <- dim(sigma)
    if(length(dimpar) != 3) {
      haveParams <- FALSE
      warning("covariance must be a 3D matrix")
    }
    if(G != dimpar[3]) {
      haveParams <- FALSE
      warning("means and variance parameters are incompatible")
    }
    cho <- array(apply(sigma, 3, chol), c(p, p, G))
  }
  if(!is.null(truth)) {
    if(is.null(classification)) {
      classification <- truth
      truth <- NULL
    }
    else {
      if(length(unique(truth)) != length(unique(classification)))
        truth <- NULL
      else truth <- as.character(truth)
    }
  }
  if(!is.null(classification)) {
    classification <- as.character(classification)
    U <- sort(unique(classification))
    L <- length(U)
    noise <- (U[1] == "0")
    if(is.null(symbols)) {
      if(L <= length(mclust.options("classPlotSymbols"))) 
      { symbols <- mclust.options("classPlotSymbols")[1:L]
        if(noise)
        { symbols <- c(16,symbols)[1:L] }
      }
      else if(L <= 9) {
        symbols <- as.character(1:9)
      }
      else if(L <= 26) {
        symbols <- LETTERS
      }
    }
    else if(length(symbols) == 1)
      symbols <- rep(symbols, L)
    if(is.null(colors)) 
    { if(L <= length(mclust.options("classPlotColors"))) 
    { colors <- mclust.options("classPlotColors")[1:L]
      if(noise) 
      { colors <- unique(c("black", colors))[1:L] }
    }
    }
    else if(length(colors) == 1)
      colors <- rep(colors, L)
    if(length(symbols) < L) {
      warning("more symbols needed to show classification ")
      symbols <- rep(16,L)
    }
    if (length(colors) < L) {
      warning("more colors needed to show classification ")
      colors <- rep("black",L)
    }
  }
  if(length(what) > 1)
    what <- what[1]
  choices <- c("classification", "errors", "uncertainty")
  m <- charmatch(what, choices, nomatch = 0)
  if(m) {
    what <- choices[m]
    bad <- what == "classification" && is.null(classification)
    bad <- bad || (what == "uncertainty" && is.null(uncertainty))
    bad <- bad || (what == "errors" && (is.null(classification) || is.null(
      truth)))
    if(bad)
      warning("insufficient input for specified plot")
  }
  else {
    bad <- !m
    warning("what improperly specified")
  }
  if(bad) what <- "bad"
  main <- if(is.null(main) || is.character(main)) FALSE else as.logical(main)
  nullXlim <- is.null(xlim)
  nullYlim <- is.null(ylim)
  if(length(seeds) > 1)
    par(ask = TRUE)
  for(seed in seeds) {
    set.seed(seed)
    Q <- orth2(p)
    Data <- as.matrix(data) %*% Q
    if(dim(Data)[2] != 2)
      stop("need two dimensions")
    if(nullXlim)
      xlim <- range(Data[, 1])
    if(nullYlim)
      ylim <- range(Data[, 2])
    if(scale) {
      d <- diff(xlim) - diff(ylim)
      if(d > 0) {
        ylim <- c(ylim[1] - d/2, ylim[2] + d/2.)
      }
      else {
        xlim <- c(xlim[1] + d/2, xlim[2] - d/2)
      }
    }
    switch(EXPR = what,
           classification = {
             plot(Data[, 1], Data[, 2], type = "n", xlab = xlab, ylab = ylab, xlim
                  = xlim, ylim = ylim, main = "", ...)
             for(k in 1:L) {
               I <- classification == U[k]
               points(Data[I, 1], Data[I, 2], pch = symbols[k], col = colors[k], cex
                      = CEX)
             }
             if(main) {
               TITLE <- paste("Random Projection showing Classification: seed = ", 
                              seed)
               title(TITLE)
             }
           }
           ,
           errors = {
             ERRORS <- classError(classification, truth)$misclassified
             plot(Data[, 1], Data[, 2], type = "n", xlab = xlab, ylab = ylab, xlim
                  = xlim, ylim = ylim, main = "", ...)
             if(main) {
               TITLE <- paste("Random Projection showing Errors: seed = ", seed)
               title(TITLE)
             }
             CLASSES <- unique(as.character(truth))
             symOpen <- c(2, 0, 1, 5)
             symFill <- c(17, 15, 16, 18)
             good <- !ERRORS
             if(L > 4) {
               points(Data[good, 1], Data[good, 2], pch = 1, col = colors, cex = CEX)
               points(Data[!good, 1], Data[!good, 2], pch = 16, cex = CEX)
             }
             else {
               for(k in 1:L) {
                 K <- truth == CLASSES[k]
                 points(Data[K, 1], Data[K, 2], pch = symOpen[k], col = colors[k], 
                        cex = CEX)
                 if(any(I <- (K & ERRORS))) {
                   points(Data[I, 1], Data[I, 2], pch = symFill[k], cex = CEX)
                 }
               }
             }
           }
           ,
           uncertainty = {
             plot(Data[, 1], Data[, 2], type = "n", xlab = xlab, ylab = ylab, xlim
                  = xlim, ylim = ylim, main = "", ...)
             if(main) {
               TITLE <- paste("Random Projection showing Uncertainty: seed = ", seed
               )
               title(TITLE)
             }
             breaks <- quantile(uncertainty, probs = sort(quantiles))
             I <- uncertainty <= breaks[1]
             points(Data[I, 1], Data[I, 2], pch = 16, col = "gray75", cex = 0.5 * CEX)
             I <- uncertainty <= breaks[2] & !I
             points(Data[I, 1], Data[I, 2], pch = 16, col = "gray50", cex = 1 * CEX)
             I <- uncertainty > breaks[2] & !I
             points(Data[I, 1], Data[I, 2], pch = 16, col = "black", cex = 1.5 * CEX)
           }
           ,
{
  plot(Data[, 1], Data[, 2], type = "n", xlab = xlab, ylab = ylab, xlim
       = xlim, ylim = ylim, main = "", ...)
  if(main) {
    TITLE <- paste("Random Projection: seed = ", seed)
    title(TITLE)
  }
  points(Data[, 1], Data[, 2], pch = PCH, cex = CEX)
}
    )
if(haveParams) {
  ## plot ellipsoids
  muTrans <- crossprod(Q, mu)
  sigmaTrans <- array(apply(cho, 3, function(R, Q)
    crossprod(R %*% Q), Q = Q), c(2, 2, G))
  for(k in 1:G)
    mvn2plot(mu = muTrans[, k], sigma = sigmaTrans[,  , k], k = 15)
}
  }
invisible()
}

surfacePlot <- function(data, parameters, 
                        type = c("contour", "level", "image", "persp"), 
                        what = c("density", "uncertainty"), 
                        transformation = c("none", "log", "sqrt"), 
                        grid = 100, nlevels = 11, levels = NULL, 
                        color.palette = blue2grey.colors,
                        col = grey(0.6), xlim = NULL, ylim = NULL, 
                        xlab = NULL, ylab = NULL, scale = FALSE, 
                        main = FALSE, swapAxes = FALSE,
                        verbose = FALSE,  ...) 
{
  grid1 <- function(n, range = c(0, 1), edge = TRUE) {
    if (any(n < 0 | round(n) != n)) 
      stop("n must be nonpositive and integer")
    G <- rep(0, n)
    if (edge) {
      G <- seq(from = min(range), to = max(range), by = abs(diff(range))/(n-1))
    }
    else {
      lj <- abs(diff(range))
      incr <- lj/(2 * n)
      G <- seq(from = min(range) + incr, to = max(range) - incr, by = 2 * incr)
    }
    G
  }
  grid2 <- function(x, y) {
    lx <- length(x)
    ly <- length(y)
    xy <- matrix(0, nrow = lx * ly, ncol = 2)
    l <- 0
    for (j in 1:ly) {
      for (i in 1:lx) {
        l <- l + 1
        xy[l,] <- c(x[i], y[j])
      }
    }
    xy
  }
  data <- as.matrix(data)
  if(dim(data)[2] != 2) 
    stop("data must be two dimensional")
  
  densNuncer <- function(modelName, data, parameters) 
  {
    if(is.null(parameters$variance$cholsigma)) 
    { parameters$variance$cholsigma <- parameters$variance$sigma
      G <- dim(parameters$variance$sigma)[3]
      for(k in 1:G) 
        parameters$variance$cholsigma[,,k] <- chol(parameters$variance$sigma[,,k])
    }
    cden <- cdensVVV(data = data, parameters = parameters, logarithm = TRUE)
    pro <- parameters$pro
    if(!is.null(parameters$Vinv)) 
      pro <- pro[-length(pro)]
    z <- sweep(cden, MARGIN = 2, FUN = "+", STATS = log(pro))
    logden <- apply(z, 1, logsumexp)
    z <- sweep(z, MARGIN = 1, FUN = "-", STATS = logden)
    z <- exp(z)
    data.frame(density = exp(logden),
               uncertainty = 1 - apply(z, 1, max))
  }
  pro <- parameters$pro
  mu <- parameters$mean
  sigma <- parameters$variance$sigma
  haveParams <- !is.null(mu) && !is.null(sigma) && !is.null(pro) && 
                !any(is.na(mu)) && !any(is.na(sigma)) && !(any(is.na(pro)))
  if(haveParams) 
    { G <- ncol(mu)
      dimpar <- dim(sigma)
      if(length(dimpar) != 3) 
        { haveParams <- FALSE
          warning("covariance must be a 3D matrix")
      }
      if(G != dimpar[3]) 
        { haveParams <- FALSE
          warning("means and variance parameters are incompatible")
      }
      mu <- array(mu, c(2, G))
      sigma <- array(sigma, c(2, 2, G))
  }
  
  if(!haveParams) 
    stop("need parameters to compute density")
  
  if(swapAxes) 
    { if(haveParams) 
        { parameters$pro <- pro[2:1]
          parameters$mean <- mu[2:1,]
          parameters$variance$sigma <- sigma[2:1, 2:1,]
      }
      data <- data[, 2:1]
  }
  main <- if(is.null(main) || is.character(main)) FALSE else as.logical(main)
  if(is.null(xlim)) xlim <- range(data[, 1])
  if(is.null(ylim)) ylim <- range(data[, 2])
  if(scale)
    { par(pty = "s")
      d <- diff(xlim) - diff(ylim)
      if(d > 0) 
        { ylim <- c(ylim[1] - d/2, ylim[2] + d/2) }
      else 
        { xlim <- c(xlim[1] + d/2, xlim[2] - d/2) }
  }
  
  dnames <- dimnames(data)[[2]]
  if(is.null(xlab)) 
    { xlab <- if(is.null(dnames)) "" else dnames[1] }
  if(is.null(ylab)) 
    { ylab <- if(is.null(dnames)) "" else dnames[2] }
  
  if(length(grid) == 1) 
    grid <- c(grid, grid)
  x <- grid1(n = grid[1], range = xlim, edge = TRUE)
  y <- grid1(n = grid[2], range = ylim, edge = TRUE)
  xy <- grid2(x, y)
  if(verbose) 
    cat("\n computing density and uncertainty over grid ...\n")
  Z <- densNuncer(modelName = "VVV", data = xy, parameters = parameters)
  lx <- length(x)
  ly <- length(y)
  CI <- type
  DU <- what
  TRANS <- transformation
  if(length(CI) > 1) CI <- CI[1]
  if(length(DU) > 1) DU <- DU[1]
  if(length(TRANS) > 1) TRANS <- TRANS[1]
  switch(EXPR = DU, 
         density = { zz <- matrix(Z$density, lx, ly)
                     title2 <- "Density" }, 
         uncertainty = { zz <- matrix(Z$uncertainty, lx, ly)
                         title2 <- "Uncertainty" }, 
         stop("what improperly specified"))
  switch(EXPR = TRANS, 
         none = { title1 <- "" }, 
         log = { zz <- log(zz)
                 title1 <- "log" }, 
         sqrt = { zz <- sqrt(zz)
                  title1 <- "sqrt" }, 
         stop("transformation improperly specified"))
  
  switch(EXPR = CI, 
         "contour" = {
           title3 <- "Contour"
           if(is.null(levels)) levels <- pretty(zz, nlevels)
           contour(x = x, y = y, z = zz, levels = levels, 
                   xlab = xlab, ylab = ylab, 
                   col = col, main = "", ...)
         },
         "level" = {
           title3 <- "HDR level"
           if(is.null(levels)) levels <- pretty(zz, nlevels)
           plot(x, y, type = "n",
                xlab = xlab, ylab = ylab, ...)
           fargs <- formals(".filled.contour")
           dargs <- c(list(x = x, y = y, z = zz, 
                          levels = levels,
                          col = color.palette(length(levels))), 
                      args)
           dargs <- dargs[names(dargs) %in% names(fargs)]
           fargs[names(dargs)] <- dargs
           do.call(".filled.contour", fargs)
         }, 
         "image" = {
           title3 <- "Image"
           if(length(col) == 1)
             { if(!is.null(levels)) 
                 nlevels <- length(levels)
               col <- mapply(adjustcolor, col = col, 
                             alpha.f = seq(0.1, 1, length = nlevels))
           }
           image(x = x, y = y, z = zz, xlab = xlab, ylab = ylab, 
                 col = col, main = "", ...)
         }, 
         "persp" = {
           title3 <- "Perspective"
           dots <- list(...)
           if(is.null(dots$theta)) dots$theta <- -30
           if(is.null(dots$phi))   dots$phi <- 20
           if(is.null(dots$expand)) dots$expand <- 0.6
           do.call("persp", c(list(x = x, y = y, z = zz, 
                                   xlab = xlab, ylab = ylab, col = col,
                                   zlab = "Density", main = ""), dots))
         }, stop("type improperly specified"))
  if(main) 
    { TITLE <- paste(c(title1, title2, title3, "Plot"), collapse = " ")
      title(TITLE) }

  invisible(list(x = x, y = y, z = zz))
}

uncerPlot <- function (z, truth=NULL, ...) 
{
  oldpar <- par(no.readonly = TRUE)
  on.exit(par(oldpar))
  par(pty = "m")
  uncer <- 1 - apply(z, 1, max)
  ord <- order(uncer)
  M <- max(uncer)
  plot(uncer[ord], ylab = "uncertainty", ylim = c(-(M/32), M), 
       xaxt = "n", xlab = "observations in order of increasing uncertainty", 
       type = "n")
  points(uncer[ord], pch = 15, cex = 0.5)
  lines(uncer[ord])
  abline(h = c(0, 0), lty = 3)
  if (!is.null(truth)) {
    truth <- as.numeric(as.factor(truth))
    n <- length(truth)
    result <- map(z)
    bad <- classError(result, truth)$misclassified
    if(length(bad)) 
    { for(i in bad) 
    { x <- (1:n)[ord == i]
      lines(c(x, x), c(-(0.5/32), uncer[i]), lty = 1)
    }
    }
  }
  invisible()
}

hdrlevels <- function(density, prob)
{
  if(missing(density) | missing(prob))
    stop("Please provide both density and prob arguments to function call!")
  density <- as.vector(density)
  prob <- pmin(pmax(as.numeric(prob), 0), 1)
  alpha <- 1-prob
  lev <- quantile(density, alpha)
  names(lev) <- paste0(round(prob*100),"%")
  return(lev)
}

blue2grey.colors <- function(n) 
{
  palette <- grDevices::colorRampPalette(c("#E6E6E6", "#bcc9d1", "#6c7f97", "#3e5264"), 
                                           space = "Lab")
  palette(n)
}

bubble <- function(x, cex = c(0.2, 3), alpha = c(0.1, 1)) 
{
  x <- as.vector(x)
  cex <- cex[!is.na(cex)]
  alpha <- alpha[!is.na(alpha)]
  x <- (x - min(x))/(max(x) - min(x) + sqrt(.Machine$double.eps))
  n <- length(x)
  r <- sqrt(x/pi)
  r <- (r - min(r, na.rm = TRUE))/
       (max(r, na.rm = TRUE) - min(r, na.rm = TRUE) + sqrt(.Machine$double.eps))
  cex <- r * diff(range(cex)) + min(cex)
  alpha <- x * diff(range(alpha)) + min(alpha)
  return(list(cex = cex, alpha = alpha))
}

catwrap <- function(x, width = getOption("width"), ...)
{
# version of cat with wrapping at specified width
  cat(paste(strwrap(x, width = width, ...), collapse = "\n"), "\n")
}


##
##-- Convert to a from classes 'Mclust' and 'densityMclust' ------------------
##

as.Mclust <- function(x, ...)
{ 
  UseMethod("as.Mclust")
}

as.Mclust.default <- function(x, ...)
{ 
  if(inherits(x, "Mclust")) x
  else stop("argument 'x' cannot be coerced to class 'Mclust'")
}

as.densityMclust <- function(x, ...)
{ 
  UseMethod("as.densityMclust")
}

as.densityMclust.default <- function(x, ...)
{ 
  if(inherits(x, "densityMclust")) x
  else stop("argument 'x' cannot be coerced to class 'densityMclust'")
}

as.densityMclust.Mclust <- function(x, ...)
{ 
  class(x) <- c("densityMclust", class(x))
  x$density <- dens(modelName = x$modelName, data = x$data, 
                    parameters = x$parameters, logarithm = FALSE)
  return(x)
}

Try the mclust package in your browser

Any scripts or data that you put into this service are public.

mclust documentation built on Nov. 17, 2018, 5:04 p.m.