Nothing
#' alr EM-based imputation of rounded zeros
#'
#' A modified EM alr-algorithm for replacing rounded zeros in compositional
#' data sets.
#'
#' Statistical analysis of compositional data including zeros runs into
#' problems, because log-ratios cannot be applied. Usually, rounded zeros are
#' considerer as missing not at random missing values. The algorithm first
#' applies an additive log-ratio transformation to the compositions. Then the
#' rounded zeros are imputed using a modified EM algorithm.
#'
#' @param x compositional data
#' @param pos position of the rationing variable for alr transformation
#' @param dl detection limit for each part
#' @param eps convergence criteria
#' @param maxit maximum number of iterations
#' @param bruteforce if TRUE, imputations over dl are set to dl. If FALSE,
#' truncated (Tobit) regression is applied.
#' @param method either \dQuote{lm} (default) or \dQuote{MM}
#' @param step if TRUE, a stepwise (AIC) procedure is applied when fitting
#' models
#' @param nComp if determined, it fixes the number of pls components. If
#' \dQuote{boot}, the number of pls components are estimated using a
#' bootstraped cross validation approach.
#' @param R number of bootstrap samples for the determination of pls
#' components. Only important for method \dQuote{pls}.
#' @param verbose additional print output during calculations.
#' @return \item{xOrig }{Original data frame or matrix} \item{xImp }{Imputed
#' data} \item{wind }{Index of the missing values in the data} \item{iter
#' }{Number of iterations} \item{eps }{eps}
#' @author Matthias Templ and Karel Hron
#' @seealso \code{\link{impRZilr}}
#' @references Palarea-Albaladejo, J., Martin-Fernandez, J.A. Gomez-Garcia, J. (2007)
#' A parametric approach for dealing with compositional rounded zeros.
#' \emph{Mathematical Geology}, 39(7), 625-645.
#' @keywords manip multivariate
#' @export
#' @importFrom MASS stepAIC
#' @importFrom robustbase lmrob
#' @examples
#'
#' data(arcticLake)
#' x <- arcticLake
#' ## generate rounded zeros artificially:
#' x[x[,1] < 5, 1] <- 0
#' x[x[,2] < 47, 2] <- 0
#' xia <- impRZalr(x, pos=3, dl=c(5,47), eps=0.05)
#' xia$xImp
#'
impRZalr <- function(x, pos=ncol(x), dl=rep(0.05, ncol(x)-1),
eps=0.0001, maxit=50, bruteforce=FALSE,
method="lm", step=FALSE, nComp = "boot", R=10,
verbose=FALSE) {
# x <- xorig <- genVarsSimple(n=50,p=30)
# qu <- apply(x, 2, quantile, 0.05)
# for(i in 1:(ncol(x)-2)){
# x[x[,i] < qu[i], i] <- 0
# }
# pos <- ncol(x)-1
# x <- data.frame(x)
# dl <- qu
# method="lm"
# step=FALSE
# bruteforce=FALSE
## some checks:
if(is.matrix(x)) stop("convert to data.frame first")
stopifnot(all(x[,pos] != 0 & length(which(is.na(x[,pos]))) == 0))
if(!any(is.na(x)) && !any(x==0) ) stop("No zeros or missing values in the data")
if(method=="pls" & ncol(x)<5) stop("too less variables/parts for method pls")
if(is.null(nComp)){
pre <- FALSE
nC <- NULL
} else if(nComp=="boot"){
nC <- integer(ncol(x))
pre <- TRUE
} else if(length(nComp) == ncol(x)){
nC <- nComp
pre <- FALSE
} else {
pre <- FALSE
}
## zeros to NA:
x[x==0] <- NA
## transformation:
xa <- addLR(x, ivar=pos)
xax <- xa$x
w <- is.na(xa$x)
## dl --> phi
m <- matrix(rep(dl, each=nrow(x)), ncol=length(dl))
phi <- log(m/x[,pos])
phi <- phi[,-pos]
xOrig <- x
it <- 0
d <- 99999999
## initialisation:
xax[w] <- phi[w] * 2 / 3
## start the EM:
it <- 0
amountMiss <- length(which(w))
while( d > eps & it <= maxit ){
it <- it + 1
yold <- xax
n2 <- nrow(xax)-ncol(xax)+1
cn <- colnames(x)
n <- nrow(x)
for(i in 1:ncol(xax)){
response <- xax[,i]
predictors <- as.matrix(xax[,-i])
if(method=="lm" && !step){
lm1 <- lm(response ~ predictors)
yhat <- predict(lm1, new.data=predictors)
s <- sd(lm1$res, na.rm=TRUE)
# s <- sqrt(sum(lm1$res^2)) / n2
}
if(method=="pls" && !step){
if(it == 1 & pre){ ## evaluate ncomp.
nC[i] <- bootnComp(predictors,
y=response, R,
plotting=FALSE)$res #$res2
}
if(verbose) cat("\n ncomp for part",i,":",nC[i])
reg1 <- mvr(as.matrix(response) ~ as.matrix(predictors), ncomp=nC[i],
method="simpls")
yhat <- predict(reg1, new.data=data.frame(predictors), ncomp=nC[i])
s <- sqrt(sum(reg1$res^2)/n)
lm1 <- lm(response ~ predictors)
yhat <- predict(lm1, new.data=predictors)
s <- sd(lm1$res, na.rm=TRUE)
# s <- sqrt(sum(lm1$res^2)) / n2
}
if(method=="lm" && step){
lm1 <- lm(response ~ predictors)
lm1 <- MASS::stepAIC(lm1, trace=FALSE)
yhat <- predict(lm1, new.data=predictors)
s <- sd(lm1$res, na.rm=TRUE)
# s <- sqrt(sum(lm1$res^2)) / n2
}
if(method=="MM" && !step) {
lm1 <- robustbase::lmrob(response ~ predictors)
yhat <- predict(lm1, new.data=predictors)
if(any(is.na(yhat))) stop("NA in yhat")
if(any(yhat=="Inf")) stop("Inf in yhat")
s <- lm1$s
}
if(method=="MM" && step) {
stop("robust + stepwise is not implemented until now.")
}
ex <- (phi[,i] - yhat)/s
tobit <- s*dnorm(ex)/pnorm(ex)
tobit <- ifelse(tobit=="NaN", 0, tobit)
tobit <- ifelse(is.na(tobit), 0, tobit)
tobit <- ifelse(tobit =="Inf", 0, tobit)
tobit <- ifelse(tobit =="-Inf", 0, tobit)
yhat2 <- yhat - tobit
# check if we are under the DL:
if(any(yhat2[w[,i]] >= phi[w[,i],i])) stop(paste("values above the DL are imputed. \n column",i))
if(bruteforce){
xax[w[,i],i] <- ifelse(yhat2[w[,i]] >= phi[w[,i],i], phi[w[,i],i], yhat2[w[,i]])
} else {
xax[w[,i],i] <- yhat2[w[,i]]
}
}
d <- sum(abs(xax - yold))/amountMiss
}
## backtransform:
xa$x.alr <- xax
ximp <- suppressWarnings(addLRinv(xa))
## result:
res <- list(x=ximp, wind=NULL, iter=it, eps=eps)
invisible(res)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.