# R/rg-mvalloc.R In StatDA: Statistical Analysis for Environmental Data

```"rg.mvalloc" <-
function(pcrit = 0.050000000000000003, x, ...)
{
# Function to allocate an individual to one of several (m) populations;
# see Garrett, RG, 1990: A robust multivariate allocation procedure with
# applications to geochemical data, in: Proceedings of Colloquium on
# Statistical Applications in the Earth Sciences (Eds, FP Agterberg &
# GF Bonham-Carter), Geol Surv Can Paper 89-9, pp 309-318.
#
# Matrix x contains the individuals to be allocated.  The following m
# objects are the saved lists for the reference populations generated by
# md.gait, rg.robmva or rg.mva to estimate Mahalanobis distances and
# predicted probabilities of group membership (typicalities)
# for individuals in matrix x.
#
# Note that the log|determinant| of the appropriate covariance matrix is
# added to the Mahalanobis distance on the assumption that the
# covariance matrices are inhomogeneous.
#
# The procedure allocates each individual to the population it is most
# similar to.  If the predicted probability of group membership is less than
# pcrit it is allocated to group 0.  Thus there are m+1 possible allocations.
#
# If the data require transformation this must be undertaken prior to calling
# this function.  This implies that a similar transformation must have been
# used for all the reference data subsets.
#
## Count reference populations and set up arrays
ListOfGroups <- list(...)
m <- length(ListOfGroups)
px <- length(x[1,  ])
nx <- length(x[, 1])
cat("  m =", m, "\t px =", px, "\t nx =", nx, "\n")
temp <- (nx - px)/(px * (nx + 1))
groups <- character(m)
md <- numeric(nx * m)
md <- array(md, dim = c(nx, m))
pgm <- numeric(nx * m)
pgm <- array(pgm, dim = c(nx, m))
xalloc <- integer(nx)
## Estimate probabilities of group membership
for(k in 1:m) {
if(ListOfGroups[[k]]\$p != px)
stop("\n  p != px for data set ", k, "\n")
groups[k] <- ListOfGroups[[k]]\$main
if(ListOfGroups[[k]]\$nc <= 5 * ListOfGroups[[k]]\$p)
cat("  *** Proceed with Care, n < 5p for group", groups[k], "***\n")
if(ListOfGroups[[k]]\$nc <= 3 * ListOfGroups[[k]]\$p)
cat("  *** Proceed with Great Care, n < 3p for group", groups[k],
"***\n")
md[, k] <- mahalanobis(x, ListOfGroups[[k]]\$mean, ListOfGroups[[k]]\$cov)
pgm[, k] <- round(1 - pf(temp * md[, k], px, nx - px), 4)
md[, k] <- md[, k] + det(ListOfGroups[[k]]\$cov)
}
## Allocate individuals to groups they are most similar to (i.e. lowest md[, k]) having
## allowed for heterogeneity of covariance, and check for 'outliers' where all predicted
## probabilities of group membership are < pcrit.
for(i in 1:nx) {
xalloc[i] <- which(md[i,  ] == min(md[i,  ]))
if(max(pgm[i,  ]) < pcrit)
xalloc[i] <- 0
}
invisible(list(groups = groups, m = m, n = nx, p = px, pgm = pgm, pcrit = pcrit, xalloc
= xalloc))
}
```

## Try the StatDA package in your browser

Any scripts or data that you put into this service are public.

StatDA documentation built on March 13, 2020, 2:42 a.m.