R/rg-mvalloc.R
In StatDA: Statistical Analysis for Environmental Data

"rg.mvalloc" <- 
function(pcrit = 0.050000000000000003, x, ...)
{
	# Function to allocate an individual to one of several (m) populations;
	# see Garrett, RG, 1990: A robust multivariate allocation procedure with
	# applications to geochemical data, in: Proceedings of Colloquium on
	# Statistical Applications in the Earth Sciences (Eds, FP Agterberg &
	# GF Bonham-Carter), Geol Surv Can Paper 89-9, pp 309-318.
	#
	# Matrix x contains the individuals to be allocated.  The following m
	# objects are the saved lists for the reference populations generated by 
	# md.gait, rg.robmva or rg.mva to estimate Mahalanobis distances and
	# predicted probabilities of group membership (typicalities)
	# for individuals in matrix x.
	#
	# Note that the log|determinant| of the appropriate covariance matrix is
	# added to the Mahalanobis distance on the assumption that the
	# covariance matrices are inhomogeneous.
	#
	# The procedure allocates each individual to the population it is most
	# similar to.  If the predicted probability of group membership is less than
	# pcrit it is allocated to group 0.  Thus there are m+1 possible allocations.
	#
	# If the data require transformation this must be undertaken prior to calling
	# this function.  This implies that a similar transformation must have been
	# used for all the reference data subsets.
	#
	## Count reference populations and set up arrays
	ListOfGroups <- list(...)
	m <- length(ListOfGroups)
	px <- length(x[1,  ])
	nx <- length(x[, 1])
	#cat("  m =", m, "\t px =", px, "\t nx =", nx, "\n")
	temp <- (nx - px)/(px * (nx + 1))
	groups <- character(m)
	md <- numeric(nx * m)
	md <- array(md, dim = c(nx, m))
	pgm <- numeric(nx * m)
	pgm <- array(pgm, dim = c(nx, m))
	xalloc <- integer(nx)
	## Estimate probabilities of group membership
	for(k in 1:m) {
		if(ListOfGroups[[k]]$p != px)
			stop("\n  p != px for data set ", k, "\n")
		groups[k] <- ListOfGroups[[k]]$main
		#if(ListOfGroups[[k]]$nc <= 5 * ListOfGroups[[k]]$p)
		#	cat("  *** Proceed with Care, n < 5p for group", groups[k], "***\n")
		#if(ListOfGroups[[k]]$nc <= 3 * ListOfGroups[[k]]$p)
		#	cat("  *** Proceed with Great Care, n < 3p for group", groups[k], 
		#		"***\n")
		md[, k] <- mahalanobis(x, ListOfGroups[[k]]$mean, ListOfGroups[[k]]$cov)
		pgm[, k] <- round(1 - pf(temp * md[, k], px, nx - px), 4)
		md[, k] <- md[, k] + det(ListOfGroups[[k]]$cov)
	}
	## Allocate individuals to groups they are most similar to (i.e. lowest md[, k]) having
	## allowed for heterogeneity of covariance, and check for 'outliers' where all predicted
	## probabilities of group membership are < pcrit. 
	for(i in 1:nx) {
		xalloc[i] <- which(md[i,  ] == min(md[i,  ]))
		if(max(pgm[i,  ]) < pcrit)
			xalloc[i] <- 0
	}
	invisible(list(groups = groups, m = m, n = nx, p = px, pgm = pgm, pcrit = pcrit, xalloc
		 = xalloc))
}