# R/rlga.R In lga: Tools for linear grouping analysis (LGA)

#### Documented in rlga

```rlga <- function(x, k, alpha=0.9, ...)  UseMethod("rlga")

"rlga.default" <- function(x, k, alpha=0.9, biter=NULL, niter=10, showall=FALSE, scale=TRUE,
nnode=NULL, silent=FALSE, ...){
## Scale the dataset (if required), otherwise parse to a matrix
x <- as.matrix(x)
if (any(is.na(x))) stop("missing data in 'x'")

if(scale)
x <- scale(x, center=FALSE, scale=sqrt(apply(x,2,var)))
if (!is.numeric(x)) stop("'x' does not appear to be numeric.\n")
n <- nrow(x); d <- ncol(x)

## Check alpha
if (alpha < 0.5 || alpha > 1)
stop("'alpha' must be in [0.5, 1].")

## Check the number of groups
if (floor(k) != k)
stop("'k' is not an integer")
else if (k < 1)
stop ("'k' is not greater than 0")
else if (k == 1) {
warning("'k' is equal to 1 - performing robust orthogonal regression")
biter <- 100
}
else {
## Set the number of random starts (if not provided)
if (is.null(biter))
if(is.na(biter <- lga.NumberStarts(n, d, k, p=0.95)))
stop("'biter' ill-specified. Rerun, setting 'biter'\n")
}
if (!silent)
cat("RLGA Algorithm \nk =", k, "\tbiter =", biter, "\tniter =", niter, "\n")

## Choose the starting hyperplane coefficients for each biter
hpcoef <- list()
for (j in 1:biter){
## Choose starting clusters
clindex <- matrix(sample(1:n, size=k*d, replace=FALSE), nrow=k)
## form initial hyperplanes - each row is a hyperplane
hpcoef[[j]] <- matrix(NA, nrow=k, ncol=(d+1))
for (i in 1:k)
hpcoef[[j]][i,] <- lga.orthreg(x[clindex[i,],])
}

if(is.null(nnode)){
## not parallel
outputsl <- lapply(hpcoef, rlga.iterate, x, k, d, n, niter, alpha)
}
else {
## parallel
if (!silent) cat("Setting up nodes \n")
if (!(require(snow))) stop("Can't find required packages: snow")
cl <- makeCluster(nnode)
clusterEvalQ(cl, library(lga))
outputsl <- clusterApplyLB(cl, hpcoef, rlga.iterate, x, k, d, n, niter, alpha)
stopCluster(cl)
if (!silent) cat("Nodes closed \n")
}

outputs <- matrix(unlist(outputsl), ncol = biter, nrow = n + 2)

## Find the number of converged results
nconverg <- sum(outputs[n+1,])
if (nconverg == 0)
warning("LGA failed to converge for any iteration\n")
if (!showall){
## remove any columns with NAs
outputs <- outputs[,complete.cases(t(outputs)), drop=FALSE]
if (nconverg != 0)
outputs <- outputs[,outputs[n+1,]==1, drop=FALSE]
outputs <- outputs[, which.min(outputs[n+2,]), drop=FALSE]
if(ncol(outputs) > 1)
outputs <- lga.CheckUnique(outputs)
}
if (!silent) {
cat("\nFinished.\n")
if(showall) cat("\nReturning all outputs \n", "")
}

if (!showall){
## Fit hyerplane(s)
hp <- matrix(NA, nrow=k, ncol=(d+1))
for (i in 1:k)
hp[i,] <- lga.orthreg(x[outputs[1:n,] == i,])
ROSS <- lga.calculateROSS(hp, x, n, d, outputs[1:n,])
}
else {
ROSS <- outputs[n+2,]
hp <- NULL
}

fout <- list(cluster=outputs[1:n,], ROSS=ROSS, converged=outputs[n+1,],
nconverg=nconverg, x=x, hpcoef=hp)

attributes(fout) <- list(class=c("rlga","lga"), biter=biter, niter=niter, scaled=scale, k=k, alpha=alpha,
names=names(fout))
return(fout)
}

"rlga.iterate" <-  function(hpcoef, xsc, k, d, n, niter, alpha){
## give the function the inital set of hyperplanes (in hpcoef)
groups <- rlga.dodist(xsc, hpcoef, k, d, n, alpha)
iter <- 0
if (any(table(groups) < d) | (length(unique(groups)) < k))
iter <- (niter+10)
oldgroups <- 1
converged <- FALSE
while (!converged & iter < niter) {
iter <- iter+1
oldgroups <- groups
for (i in 1:k){
hpcoef[i,] <- lga.orthreg(xsc[groups==i,])
}
groups <- rlga.dodist(xsc, hpcoef, k, d, n, alpha)
if (any(table(groups) < d) | (length(unique(groups)) < k))
iter <- (niter+10) # if there aren't enough obs in a group
if (all(oldgroups==groups)) converged <- TRUE
}
return(list(groups, converged, lga.calculateROSS(hpcoef, xsc, n, d, groups)))
}

"rlga.dodist" <- function(y, coeff, k, d, n, alpha){
## This function calculates the (orthogonal) Residuals for different hyerplanes,
## calculates which hyperplane each observation is closest to, and then takes the
## smallest alpha of them (setting the rest to zero)

dist <- (y %*% t(coeff[,1:d, drop=FALSE])- matrix(coeff[,d+1, drop=FALSE], ncol=k, nrow=n, byrow=TRUE))^2
mindist <- max.col(-dist, ties.method="first")
mindist2 <- dist[cbind(1:n, mindist)]
mindist[ mindist2 > quantile(mindist2, alpha) ] <- 0
return(mindist)
}

"print.rlga" <- function(x, ...) { ## S3method for printing LGA class
cat("Output from RLGA:\n\nDataset scaled =", attr(x, "scaled"), "\tk =",attr(x, "k"),
"\tniter =",attr(x, "niter"), "\tbiter =", attr(x, "biter"), "\nAlpha =", attr(x, "alpha"),
"\nNumber of converged =",x\$nconverg, ifelse(length(x\$ROSS)>1, "\nROSS =", "\nBest clustering has ROSS ="),
x\$ROSS,"\n")
}
```

## Try the lga package in your browser

Any scripts or data that you put into this service are public.

lga documentation built on May 30, 2017, 3:07 a.m.