R/gx.mvalloc.R
In rgr: Applied Geochemistry EDA

Documented in gx.mvalloc

gx.mvalloc <-
function(pcrit = 0.05, x, ...)
{
     # Function to allocate an individual to one of several (m) populations;
     # see Garrett, RG, 1990: A robust multivariate allocation procedure with
     # applications to geochemical data, in: Proceedings of Colloquium on
     # Statistical Applications in the Earth Sciences (Eds, FP Agterberg &
     # GF Bonham-Carter), Geol Surv Can Paper 89-9, pp 309-318.
     #
     # Matrix x contains the individuals to be allocated.  The following m
     # objects are the saved lists for the reference populations generated by 
     # gx.md.gait, gx.robmva or gx.mva to estimate Mahalanobis distances and
     # predicted probabilities of group membership (typicalities) for the
     # individuals in matrix x.
     #
     # Note that the log|determinant| of the appropriate covariance matrix
     # is added to the Mahalanobis distance on the assumption that the
     # covariance matrices are inhomogeneous.
     #
     # The procedure allocates each individual to the population it is most
     # similar to.  If the predicted probability of group membership is less
     # than pcrit it is allocated to group 0.  Thus there are kk+1 possible
     # allocations.
     #
     # If the data require transformation this must be undertaken prior to
     # calling this function.  This implies that a similar transformation
     # must have been used for all the reference data subsets.
     #
     # Count reference populations and set up arrays
     ListOfGroups <- list(...)
     kk <- length(ListOfGroups)
     px <- length(x[1,  ])
     nx <- length(x[, 1])
     cat("  k =", kk, "\t px =", px, "\t nx =", nx, "\n")
     temp <- (nx - px)/(px * (nx + 1))
     groups <- character(kk)
     md <- numeric(nx * kk)
     md <- array(md, dim = c(nx, kk))
     pgm <- numeric(nx * kk)
     pgm <- array(pgm, dim = c(nx, kk))
     xalloc <- integer(nx)
     # Estimate probabilities of group membership
     for(k in 1:kk) {
         if(ListOfGroups[[k]]$p != px)
             stop("\n  p != px for data set ", k, "\n")
             groups[k] <- ListOfGroups[[k]]$main
             if(ListOfGroups[[k]]$nc <= 5 * ListOfGroups[[k]]$p)
                 cat("  *** Proceed with Care, n < 5p for group", groups[k], "***\n")
             if(ListOfGroups[[k]]$nc <= 3 * ListOfGroups[[k]]$p)
                 cat("  *** Proceed with Great Care, n < 3p for group", groups[k], "***\n")
             md[, k] <- mahalanobis(x, ListOfGroups[[k]]$mean, ListOfGroups[[k]]$cov)
             pgm[, k] <- round(1 - pf(temp * md[, k], px, nx - px), 4)
             md[, k] <- md[, k] + det(ListOfGroups[[k]]$cov)
         }
     # Allocate individuals to groups they are most similar to (i.e. lowest md[, k])
     # having allowed for heterogeneity of covariance, and check for 'outliers'
     # where all predicted probabilities of group membership are < pcrit. 
     for(i in 1:nx) {
         xalloc[i] <- which(md[i,  ] == min(md[i,  ]))
         if(max(pgm[i,  ]) < pcrit) xalloc[i] <- 0
     }
     invisible(list(groups = groups, kk = kk, n = nx, p = px, pcrit = pcrit, pgm = pgm, xalloc = xalloc))
}