lrEMplus <- function(X, dl = NULL, rob = FALSE, ini.cov = c("complete.obs", "multRepl"), frac = 0.65,
tolerance = 0.0001, max.iter = 50,
rlm.maxit=150, suppress.print = FALSE, closure=NULL, z.warning=0.8, z.delete=TRUE, delta=NULL){
if (any(X<0, na.rm=T)) stop("X contains negative values")
if (is.character(dl)) stop("dl must be a numeric vector or matrix")
if (is.null(dl)){ # If dl not given use min per column
dl <- apply(X,2, function(x) min(x[x!=0]))
warning("No dl vector or matrix provided. The minimum observed values for each column used as detection limits.")
}
if (is.vector(dl)) dl <- matrix(dl,nrow=1)
dl <- as.matrix(dl) # Avoids problems when dl might be multiple classes
if ((is.vector(X)) | (nrow(X)==1)) stop("X must be a data matrix")
if (ncol(dl)!=ncol(X)) stop("The number of columns in X and dl do not agree")
if ((nrow(dl)>1) & (nrow(dl)!=nrow(X))) stop("The number of rows in X and dl do not agree")
if (any(is.na(X))==FALSE) stop("No missing data were found in the data set")
if (any(X==0, na.rm=T)==FALSE) stop("No zeros were found in the data set")
if (!missing("delta")){
warning("The delta argument is deprecated, use frac instead: frac has been set equal to delta.")
frac <- delta
}
ini.cov <- match.arg(ini.cov)
gm <- function(x, na.rm=TRUE){
exp(sum(log(x), na.rm=na.rm) / length(x))
}
## Preliminaries ----
X <- as.data.frame(X,stringsAsFactors=TRUE)
nn <- nrow(X); D <- ncol(X)
X <- as.data.frame(apply(X,2,as.numeric),stringsAsFactors=TRUE)
c <- apply(X,1,sum,na.rm=TRUE)
checkNumZerosCol <- apply(X, 2, function(x) sum(is.na(x)))
if (any(checkNumZerosCol/nrow(X) > z.warning)) {
cases <- which(checkNumZerosCol/nrow(X) > z.warning)
if (z.delete == TRUE) {
if (length(cases) > (ncol(X)-2)) {
stop(paste("Almost all columns contain >", z.warning*100,
"% zeros/unobserved values (see arguments z.warning and z.delete).",
sep=""))
}
X <- X[,-cases]
action <- "deleted"
warning(paste("Column no. ",cases," containing >", z.warning*100,
"% zeros/unobserved values ", action, " (see arguments z.warning and z.delete).\n",
sep=""))
} else {
action <- "found"
warning(paste("Column no. ",cases," containing >", z.warning*100,
"% zeros/unobserved values ", action, " (see arguments z.warning and z.delete. Check out with zPatterns()).\n",
sep=""))
}
}
checkNumZerosRow <- apply(X, 1, function(x) sum(is.na(x)))
if (any(checkNumZerosRow/ncol(X) > z.warning)) {
cases <- which(checkNumZerosRow/ncol(X) > z.warning)
if (z.delete == TRUE) {
if (length(cases) > (nrow(X)-2)) {
stop(paste("Almost all rows contain >", z.warning*100,
"% zeros/unobserved values (see arguments z.warning and z.delete).",
sep=""))
}
X <- X[-cases,]
action <- "deleted"
warning(paste("Row no. ",cases," containing >", z.warning*100,
"% zeros/unobserved values ", action, " (see arguments z.warning and z.delete).\n",
sep=""))
} else {
action <- "found"
warning(paste("Row no. ", cases," containing >", z.warning*100,
"% zeros/unobserved values ", action,
" (see arguments z.warning and z.delete. Check out with zPatterns()).\n",
sep=""))
}
}
if (nrow(dl)==1) dl <- matrix(rep(1,nn),ncol=1)%*%dl
# Check for closure
closed <- 0
if (all( abs(c - mean(c)) < .Machine$double.eps^0.3 )) closed <- 1
if (sum(is.na(X)) > sum(X==0,na.rm=T)){
X.old <- X
# Initial simple imputation of zero
for (i in 1:nn){
if (any(X.old[i, ]==0,na.rm=T)){
z <- which(X.old[i, ]==0)
X.old[i,z] <- frac*dl[i,z]
}
}
# Initial lrEM imputation of missing data
X.old <- lrEM(X.old, label = NA, imp.missing = TRUE, ini.cov = ini.cov, rob = rob,
tolerance = tolerance, max.iter = max.iter, rlm.maxit = rlm.maxit,
suppress.print = TRUE, closure = closure, z.warning = z.warning,
z.delete = z.delete)
}
if (sum(is.na(X)) <= sum(X==0,na.rm=T)){
X.old <- X
# Initial ordinary geo mean imputation of missing (ignores 0s in the column if any)
gmeans <- apply(X.old,2,function(x) gm(x[x!=0]))
for (i in 1:nn){
if (any(is.na(X.old[i, ]))){
z <- which(is.na(X.old[i, ]))
X.old[i,z] <- gmeans[z]
}
}
# Initial lrEM imputation of zeros
X.old <- lrEM(X.old, label = 0, dl = dl, ini.cov = ini.cov, rob = rob,
tolerance = tolerance, max.iter = max.iter, rlm.maxit = rlm.maxit,
suppress.print = TRUE, closure = closure, z.warning = z.warning,
z.delete = z.delete)
}
# Initial parameter estimates
X.old_alr <- log(X.old)-log(X.old[,D])
X.old_alr <- as.matrix(X.old_alr[,-D])
M.old <- matrix(colMeans(X.old_alr),ncol=1)
C.old <- cov(X.old_alr)
iter_again <- 1
niters <- 0
while (iter_again == 1){
niters <- niters+1
if (niters > 1) {X.old <- X.new; M.old <- M.new; C.old <- C.new}
X.old <- as.matrix(X.old)
X.old[which(X==0)] <- 0
X.new <- lrEM(X.old, label = 0, dl = dl, ini.cov = ini.cov, rob = rob,
tolerance = tolerance, max.iter = max.iter, rlm.maxit = rlm.maxit, suppress.print = TRUE,
closure = closure, z.warning = z.warning, z.delete = z.delete)
X.new[is.na(X)] <- NA
X.new <- lrEM(X.new, label = NA, imp.missing = TRUE, ini.cov = ini.cov, rob = rob,
tolerance = tolerance, max.iter = max.iter, rlm.maxit = rlm.maxit, suppress.print = TRUE,
closure = closure, z.warning = z.warning, z.delete = z.delete)
X.new_alr <- log(X.new)-log(X.new[,D])
X.new_alr <- as.matrix(X.new_alr[,-D])
M.new <- matrix(colMeans(X.new_alr),ncol=1)
C.new <- cov(X.new_alr)
# Convergence check
Mdif <- max(abs(M.new-M.old))
Cdif <- max(max(abs(C.new-C.old)))
if ((max(c(Mdif,Cdif)) < tolerance) | (niters == max.iter)) iter_again <- 0
}
## Final section ----
if (closed==1) X.new <- t(apply(X.new,1,function(x) x/sum(x)*c[1])) # If not closed lrEM above takes care of it
if (suppress.print==FALSE) cat(paste("No. iterations to converge: ",niters,"\n\n"))
return(as.data.frame(X.new,stringsAsFactors=TRUE))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.