# R/batteryreduction.R In batteryreduction: An R Package for Data Reduction by Battery Reduction

```#' A function for data reduction
#'
#' Battery reduction is a method used in data reduction. It uses Gram-Schmidt orthogonal rotations to find out a subset of variables best representing the original set of variables.
#' @param vars a vector of variable names
#' @param numfact	numeric, number of variables to be selected
#' @param data a dataframe
#' @return If numfact=1, reda matrix and selected variable are returned. If numfact>=2, reda matrix, rota matrix, and selected variables are returned.
#' @references D'Agostino, R. B., & Zhang, Z. (1992). Data/variable reduction by principal components, battery reduction and variable clustering. MATRIX, 7(60), 06.
#' @references Hans Werner Borchers (2015). pracma: Practical Numerical Math Functions. R package version 1.8.6. http://CRAN.R-project.org/package=pracma
#' @examples ## Generate an example dataset
#' set.seed(1234)
#' data<-data.frame(x1=rnorm(n=100, mean=14, sd=7),
#' x2=rnorm(n=100, mean=3, sd=1),
#' x3=rpois(n=100, lambda=1),
#' x4=rpois(n=100, lambda=10),
#' x5=rgamma(n=100, shape=1),
#' x6=rgamma(n=100, shape=10))

#' ## Demonstrate batteryreduction
#' vars<-c('x1','x2','x3','x4','x5','x6')
#' numfact<-3
#' batteryreduction(vars, numfact, data)

batteryreduction<-function(vars, numfact, data){
#1. Compute the correlation matrix
corr.matrix<-cor(data[, vars], use = "everything",method = "pearson")

m<-eigen(corr.matrix)\$values
d<-diag(m)
e<-eigen(corr.matrix)\$vectors #Seems e estimate is different from SAS
a<-e%*%sqrt(d)
nfact<-numfact
nvars<-nrow(a)
reda<-as.matrix(a[, c(1:nfact)])
rownames(reda)<-vars
af<-matrix(nrow=nvars, ncol=1)
af[,1]<-0

#3. Performs the gram-schmidt rotation
if (nfact>=2){
for (i in 1:(nfact-1)){
if (i==1){
a1<-a[, c(i:nfact)]
}else{
colno<-ncol(a)
a1<-a[, c(2:colno)]
}

sum.of.squares<-function(x) sum(x*x)
sq<-apply(a1, 1, sum.of.squares)
max.index<-which.max(sq)
norm1<-a1[max.index,]
sq1<-sq[[max.index]]
sq1<-sqrt(sq1)
normn1<-norm1/sq1
nfact1<-nfact-i
ident=diag(nfact1)
values<-rep(0,times=nfact1)
zero<-matrix(values,nrow=1,ncol=nfact1)
tempy<-rbind(ident,zero)
#normn1<-t(normn1)
ty<-cbind(normn1,tempy)

ayp1<-a1 %*% gramSchmidt(ty)\$Q
a<-ayp1
colno<-ncol(a)

if (i-(nfact-1)<0) {
af<-cbind(af, a[, c(1)])
}else {
af<-cbind(af, a[, c(1:2)])
}

}

rota<-af[,c(2:(nfact+1))]
rownames(rota)<-vars
per_var<-apply(rota, 2, sum.of.squares)
per_var<-per_var/nvars

selected<-rep(NA,nfact)
for (i in 1:nfact){
selected[i]<-rownames(rota)[which.max(abs(rota[,i]))]
}

return(list(reda = reda, rota = rota, per_var=per_var, selected=selected))
}else{
selected<-vars[which.max(abs(reda))]
return(list(reda = reda, selected=selected))
}
}
```

## Try the batteryreduction package in your browser

Any scripts or data that you put into this service are public.

batteryreduction documentation built on May 2, 2019, 11:26 a.m.