R/imputeBDLs.R

Defines functions `imputeBDLs`

#' EM-based replacement of rounded zeros in compositional data
#' 
#' Parametric replacement of rounded zeros for compositional data using
#' classical and robust methods based on ilr coordinates with a special
#' choice of balances.
#' 
#' Statistical analysis of compositional data including zeros runs into
#' problems, because log-ratios cannot be applied.  Usually, rounded zeros are
#' considerer as missing not at random missing values.
#' 
#' The algorithm iteratively imputes parts with rounded zeros whereas in each
#' step (1) compositional data are expressed in pivot coordinates (2) tobit regression is
#' applied (3) the rounded zeros are replaced by the expected values (4) the
#' corresponding inverse ilr mapping is applied. After all parts are
#' imputed, the algorithm starts again until the imputations do not change.
#' 
#' @aliases imputeBDLs print.replaced checkData adjustImputed
#' @param x data.frame or matrix
#' @param maxit maximum number of iterations
#' @param eps convergency criteria 
#' @param method either "lm", "lmrob" or "pls"
#' @param dl Detection limit for each variable. zero for variables with
#' variables that have no detection limit problems.
#' @param variation, if TRUE those predictors are chosen in each step, who's variation is lowest to the predictor.
#' @param nPred, if determined and variation equals TRUE, it fixes the number of predictors 
#' @param nComp if determined, it fixes the number of pls components. If
#' \dQuote{boot}, the number of pls components are estimated using a
#' bootstraped cross validation approach.
#' @param bruteforce sets imputed values above the detection limit to the
#' detection limit. Replacement above the detection limit are only exeptionally
#' occur due to numerical instabilities. The default is FALSE!
#' @param noisemethod adding noise to imputed values. Experimental
#' @param noise TRUE to activate noise (experimental)
#' @param R number of bootstrap samples for the determination of pls
#' components. Only important for method \dQuote{pls}.
#' @param correction normal or density
#' @param verbose additional print output during calculations.
#' @param test an internal test situation (this parameter will be deleted soon)
#' @importFrom cvTools cvFit 
#' @importFrom zCompositions multRepl
#' @importFrom fpc pamk
#' @import pls
#' @return \item{x }{imputed data} \item{criteria }{change between last and
#' second last iteration} \item{iter }{number of iterations} \item{maxit
#' }{maximum number of iterations} \item{wind}{index of zeros}
#' \item{nComp}{number of components for method pls} \item{method}{chosen
#' method}
#' @author Matthias Templ, method subPLS from Jiajia Chen
#' @references Templ, M., Hron, K., Filzmoser, P., Gardlo, A. (2016). 
#' Imputation of rounded zeros for high-dimensional compositional data. 
#' \emph{Chemometrics and Intelligent Laboratory Systems}, 155, 183-190.
#' 
#' Chen, J., Zhang, X., Hron, K., Templ, M., Li, S. (2018). 
#' Regression imputation with Q-mode clustering for rounded zero replacement in high-dimensional compositional data. 
#' \emph{Journal of Applied Statistics}, 45 (11), 2067-2080.
#' @seealso \code{\link{imputeBDLs}}
#' @keywords manip multivariate
#' @export
#' @importFrom sROC kCDF
#' @importFrom MASS rlm
#' @examples
#' 
#' p <- 10
#' n <- 50
#' k <- 2
#' T <- matrix(rnorm(n*k), ncol=k)
#' B <- matrix(runif(p*k,-1,1),ncol=k)
#' X <- T %*% t(B)
#' E <-  matrix(rnorm(n*p, 0,0.1), ncol=p)
#' XE <- X + E
#' data <- data.frame(pivotCoordInv(XE))
#' col <- ncol(data)
#' row <- nrow(data)
#' DL <- matrix(rep(0),ncol=col,nrow=1)
#' for(j in seq(1,col,2))
#' {DL[j] <- quantile(data[,j],probs=0.06,na.rm=FALSE)}
#' 
#' for(j in 1:col)        
#' {data[data[,j]<DL[j],j] <- 0}
#' \dontrun{
#' # under dontrun because of long exectution time
#' imp <- imputeBDLs(data,dl=DL,maxit=10,eps=0.1,R=10,method="subPLS")
#' imp
#' imp <- imputeBDLs(data,dl=DL,maxit=10,eps=0.1,R=10,method="pls", variation = FALSE)
#' imp
#' imp <- imputeBDLs(data,dl=DL,maxit=10,eps=0.1,R=10,method="lm")
#' imp
#' imp <- imputeBDLs(data,dl=DL,maxit=10,eps=0.1,R=10,method="lmrob")
#' imp
#' 
#' data(mcad)
#' ## generate rounded zeros artificially:
#' x <- mcad
#' x <- x[1:25, 2:ncol(x)]
#' dl <- apply(x, 2, quantile, 0.1)
#' for(i in seq(1, ncol(x), 2)){
#'   x[x[,i] < dl[i], i] <- 0
#' } 
#' ni <- sum(x==0, na.rm=TRUE) 
#' ni/(ncol(x)*nrow(x)) * 100
#' dl[seq(2, ncol(x), 2)] <- 0
#' replaced_lm <- imputeBDLs(x, dl=dl, eps=1, method="lm",  
#'   verbose=FALSE, R=50, variation=TRUE)$x
#' replaced_lmrob <- imputeBDLs(x, dl=dl, eps=1, method="lmrob",  
#'   verbose=FALSE, R=50, variation=TRUE)$x
#' replaced_plsfull <- imputeBDLs(x, dl=dl, eps=1, 
#'   method="pls", verbose=FALSE, R=50, 
#'   variation=FALSE)$x 
#' }
#' 
#' 
#' 
`imputeBDLs` <-
  function(x, maxit=10, eps=0.1, method="subPLS", 
           dl=rep(0.05, ncol(x)), variation=TRUE,	nPred=NULL, 
           nComp = "boot", bruteforce=FALSE,  
           noisemethod="residuals", noise=FALSE, R=10, 
           correction="normal", verbose=FALSE, test = FALSE){
    
    ## needed for visible global function definition
    isomLRInvp <- 1
    isomLRinv <- function(x){
      pivotCoordInv(x = x)
    }
    
    ## check if all is fine:
    # check if values are in (0, dl[i]):
    checkDL <- function(x, dl, indexNA){
      check <- logical(ncol(x))
      for(i in 1:ncol(x)){
        critvals <- x[indexNA[,i],i] > dl[i]
        check[i] <- any(critvals)
        if(check[i]){ 
          x[which(x[indexNA[,i],i] > dl[i]),i] <- dl[i]
        }
      }
      if(any(check) & verbose){
        message(paste(sum(critvals), "/", sum(w), "replaced values has been corrected"))      
      }
      x
    }
    
    ## check if data are fine
    checkData(x, dl)
    
    ## some specific checks
    stopifnot((method %in% c("lm", "MM", "lmrob", "pls", "subPLS")))
    if(method=="pls" & ncol(x)<5) stop("too less variables/parts for method pls")
    if(!(correction %in% c("normal","density"))){
      stop("correction method must be normal or density")
    }
    if(method == "pls" & variation){
      stop("if variation is TRUE then pls is not supported.")
    }
    
    ## how to deal with user input on nComp
    if(is.null(nComp)){
      pre <- FALSE
      nC <- NULL
    } else if(nComp=="boot"){
      nC <- integer(ncol(x))
      pre <- TRUE
    } else if(length(nComp) == ncol(x)){
      nC <- nComp
      pre <- FALSE
    } else  {
      pre <- FALSE	
    }
    
    
    ## store some important values
    n <- nrow(x) 
    d <- ncol(x)
    rs <- rowSums(x, na.rm = TRUE)
    
    if(method == "subPLS"){
      w <- x == 0
      indexFinalCheck <- x == 0
      mulzero <- zCompositions::multRepl(x,label=0,dl=dl)
      dd <- as.dist(robCompositions::variation(mulzero))
      g <- fpc::pamk(dd)$nc
      pos <- as.matrix(cutree(hclust(dd, method="ward.D"),g))
      indNA <- apply(x,2,function(x){any(x==0)})
      gz <- intersect(order(-table(pos[indNA])),which(table(pos)>1))
      
      fun.PLS <- function(x,data,dl,pos,b,R)
      {
        data_b <- data[,pos==b]
        data_nb <- data[,pos!=b]
        col1 <- ncol(data_b)
        row <- nrow(data_b)
        u <- cbind(cenLR(data_b)$x,cenLR(data_nb)$x)
        u1 <- u[,1:col1]
        n <- bootnComp(u[,-(1:col1),drop=FALSE],as.matrix(u1),R,plotting=FALSE)$res
        # form <- paste(paste(colnames(u1), collapse = "+"), "~", paste(colnames(u), collapse = "+"))
        # form <- as.formula(form)
        # pls <- mvr(form, data=cbind(u1, u), method="simpls", 
        #            ncomp=n) 
        pls <- mvr(as.matrix(u1)~ as.matrix(u[,-(1:col1)]),ncomp=n,method="simpls")
        # pls <- mvr(u1~ u[,-(1:col1)],ncomp=n,method="simpls")
        mean <- matrix(predict(pls,ncomp=n),ncol=col1)
        sigma <- cov(u1-mean)
        sig_b <- x[,pos==b]==0
        cz <- which(colSums(sig_b)!=0)
        rz <- which(rowSums(sig_b)!=0)
        lj <- dl[pos==b]
        for(j in cz)
        {
          linjie <- as.matrix(cenLR(cbind(rep(lj[j],row),data_b[,-j,drop=FALSE]))$x[,1,drop=FALSE])
          u1[sig_b[,j],j] <- mean[sig_b[,j],j]-sqrt(sigma[j,j])*(dnorm((linjie[sig_b[,j]]-mean[sig_b[,j],j])/sqrt(sigma[j,j]))/pnorm((linjie[sig_b[,j]]-mean[sig_b[,j],j])/sqrt(sigma[j,j])))
        }
        for(i in rz)
        {if(rowSums(sig_b)[i]<col1)
        {u1[i,!sig_b[i,]] <- (sum(!sig_b[i,])*u1[i,!sig_b[i,]]-rep(sum(u1[i,]),sum(!sig_b[i,])))/(sum(!sig_b[i,]))}
        }
        chabu <- adjustImputed(cenLRinv(u1)[rowSums(sig_b)<col1,],x[rowSums(sig_b)<col1,pos==b],sig_b[rowSums(sig_b)<col1,])
        data[rowSums(sig_b)<col1,pos==b] <- chabu
        return(data)
      }
      
      it <- 1
      criteria <- 99999999
      impute <- mulzero
      while(it <= maxit & criteria >= eps){
        xold <- impute
        for (m in 1:length(gz))
        {impute <- fun.PLS(x,impute,dl,pos,gz[m],R)}
        it <- it+1
        criteria <- sum(((xold-impute)/impute)^2, na.rm = TRUE)
      }
      impute <- checkDL(impute, dl, indexFinalCheck)
      res <- list(x=impute, criteria=criteria, iter=it,
                  maxit=maxit, wind=w, nComp=nC, nPred=nPred,
                  variation=variation,
                  method=method, dl=dl)
      class(res) <- "replaced"
      return(res)
    }
    
    # if(method == "pls_clust"){
    #   indexFinalCheck <- x == 0
    #   mulzero <- zCompositions::multRepl(x,label=0,dl=dl)
    #   dd <- as.dist(robCompositions::variation(mulzero))
    #   g <- fpc::pamk(dd)$nc
    #   pos <- as.matrix(cutree(hclust(dd, method="ward.D"),g))
    #   indNA <- apply(x,2,function(x){any(x==0)})
    #   gz <- intersect(order(-table(pos[indNA])),which(table(pos)>1))
    #   
    #   fun.PLS <- function(x,data,dl,pos,b,R)
    #   {
    #     data_b <- data[,pos==b]
    #     data_nb <- data[,pos!=b]
    #     col1 <- ncol(data_b)
    #     row <- nrow(data_b)
    #     u <- cbind(cenLR(data_b)$x.clr, cenLR(data_nb)$x.clr)
    #     u1 <- u[,1:col1]
    #     n <- bootnComp(u[,-(1:col1),drop=FALSE],u1,R,plotting=FALSE)$res
    #     pls <- mvr(u1~ u[,-(1:col1)],ncomp=n,method="simpls")
    #     mean <- matrix(predict(pls,ncomp=n),ncol=col1)
    #     sigma <- cov(u1-mean)
    #     sig_b <- x[,pos==b]==0
    #     cz <- which(colSums(sig_b)!=0) 
    #     rz <- which(rowSums(sig_b)!=0)
    #     lj <- dl[pos==b]
    #     for(j in cz)
    #     {
    #       linjie <- clr(cbind(rep(lj[j],row),data_b[,-j,drop=FALSE]))[,1,drop=FALSE]
    #       u1[sig_b[,j],j] <- mean[sig_b[,j],j]-sqrt(sigma[j,j])*(dnorm((linjie[sig_b[,j]]-mean[sig_b[,j],j])/sqrt(sigma[j,j]))/pnorm((linjie[sig_b[,j]]-mean[sig_b[,j],j])/sqrt(sigma[j,j])))
    #     }
    #     for(i in rz)
    #     {if(rowSums(sig_b)[i]<col1)
    #     {u1[i,!sig_b[i,]] <- (sum(!sig_b[i,])*u1[i,!sig_b[i,]]-rep(sum(u1[i,]),sum(!sig_b[i,])))/(sum(!sig_b[i,]))}
    #     }
    #     chabu <- adjustImputed(clrInv(u1)[rowSums(sig_b)<col1,],x[rowSums(sig_b)<col1,pos==b],sig_b[rowSums(sig_b)<col1,])
    #     data[rowSums(sig_b)<col1,pos==b] <- chabu
    #     return(data)
    #   }
    #   
    #   it <- 1
    #   criteria <- 99999999
    #   impute <- mulzero
    #   while(it <= maxit & criteria >= eps){
    #     xold <- impute
    #     for (m in 1:length(gz))
    #     {impute <- fun.PLS(x,impute,dl,pos,gz[m],R)}
    #     it <- it+1
    #     criteria <- sum(((xold-impute)/impute)^2, na.rm = TRUE)
    #   }
    #   impute <- checkDL(impute, dl, indexFinalCheck)
    #   
    #   res <- list(x=impute, criteria=criteria, iter=it, 
    #               maxit=maxit, wind=w, nComp=nC, nPred=nPred,
    #               variation=variation,
    #               method=method, dl=dl)
    #   class(res) <- "replaced"
    #   return(res)
    # }
    
    ## set zeros to NA for easier handling
    x[x == 0] <- NA
    x[x < 0] <- NA
    indexFinalCheck <- is.na(x)
    
    ## sort variables of x based on 
    ## decreasing number of zeros in the variables
    cn <- colnames(x)
    wcol <- - abs(apply(x, 2, function(x) sum(is.na(x))))
    o <- order(wcol)
    x <- x[,o]
    if(verbose) cat("number of variables with zeros:\n", sum(wcol != 0))
    ## --> now work in revised order of variables
    ## dl must also be in correct order
    dlordered <- dl[o]
    
    #################
    ## index of missings / non-missings
    w <- is.na(x)
    wn <- !is.na(x)
    xcheck <- x
    w2 <- is.na(x)
    
    ## initialisation
    indNA <- apply(x, 2, function(x){any(is.na(x))})
    #    print(indNA)
    for(i in 1:length(dl)){
      ind <- is.na(x[,i])
      #		if(length(ind) > 0) x[ind,i] <- dl[i]*runif(sum(ind),1/3,2/3)
      if(length(ind) > 0) x[ind,i] <- dlordered[i] *2/3
    }
    xOrig <- x
    
    ## nPred if variation == TRUE and cv for number of predictors
    ## evaluate the vector containing the amount of predictors
    if(!is.null(nPred) & length(nPred) == 1){
      nPred <- rep(nPred, ncol(x))
    }
    if(!is.null(nPred) & length(nPred) > 1){
      stop("nPred must be NULL or a vector of length 1.")
    }
    if(is.null(nPred) & variation){
      ptmcv <- proc.time()
      if(verbose) cat("\n cross validation to estimate number of predictors\n")
      ii <- 1
      if(verbose) pb <- txtProgressBar(min = 0, max = sum(indNA), style = 3)
      nPred <- numeric(nrow(x)) 
      rtmspe <- NULL
      for(i in which(indNA)){
        xneworder <- cbind(x[, i, drop=FALSE], x[, -i, drop=FALSE]) 
        rv <- variation(x, method = "Pairwise")[1,]
        cve <- numeric()
        for(np in seq(3, min(c(27,ncol(x),floor(nrow(x)/2))), 3)){
          s <- sort(rv)[np]
          cols <- which(rv <= s)[1:np]
          xn <- xneworder[, cols]
          if(test) xilr <- data.frame(isomLRp(xn)) else xilr <- data.frame(pivotCoord(xn))
          colnames(xilr)[1] <- "Y"
          call <- call(method, formula = Y ~ .)
          # perform cross-validation
          cve[np] <- suppressWarnings(cvFit(call, data = xilr, 
                                            y = xilr$Y, 
                                            cost = cvTools::rtmspe,
                                            K = 5, R = 1, costArgs = list(trim = 0.1), seed = 1234)$cv)
        }
        nPred[i] <- which.min(cve)
        ## update progress bar
        if(verbose) setTxtProgressBar(pb, ii); ii <- ii + 1
      }  
      if(verbose) close(pb)
      ptmcv <- proc.time() - ptmcv
    }
    
    ###############################
    ###   start the iteration   ###
    ###############################
    
    if(verbose) cat("\n start the iteration:")
    it <- 1; criteria <- 99999999
    while(it <= maxit & criteria >= eps){
      if(verbose) cat("\n iteration", it, "; criteria =", criteria)	
      xold <- x  
      for(i in which(indNA)){
        if(verbose) cat("\n replacement on (sorted) part", i)
        ## sort data columns first
        ## i'th positions must be first
        xneworder <- cbind(x[, i, drop=FALSE], x[, -i, drop=FALSE]) 
        ## if based on variation matrix:
        if(variation){
          orig <- xneworder  
          rv <- variation(x, method = "Pairwise")[1,]
          s <- sort(rv)[nPred[i]]
          cols <- which(rv <= s)[1:nPred[i]]
          xneworder <- xneworder[, cols]
        }
        ## factors for preserving abs values:
        # fac <- multis(xneworder)
        ## detection limit in ilr-space
        forphi <- cbind(rep(dlordered[i], n), xneworder[,-1,drop=FALSE])
        if(any(is.na(forphi))) break()
        if(test) phi <- isomLRp(forphi)[,1] else phi <- pivotCoord(forphi)[,1] 
        #		part <- cbind(x[,i,drop=FALSE], x[,-i,drop=FALSE])
        xneworder[xneworder < 2*.Machine$double.eps] <- 2*.Machine$double.eps
        if(test) xilr <- data.frame(isomLRp(xneworder)) else xilr <- data.frame(pivotCoord(xneworder))
        #        c1 <- colnames(xilr)[1]					
        #        colnames(xilr)[1] <- "V1"	
        response <- as.matrix(xilr[, 1, drop=FALSE])
        predictors <- as.matrix(xilr[, -1, drop=FALSE])
        if(method=="lm"){ 
          reg1 <- lm(response ~ predictors)
          yhat <- predict(reg1, newdata=data.frame(predictors))
        } else if(method=="MM" | method=="lmrob"){
          reg1 <- MASS::rlm(response ~ predictors, method="MM",maxit = 100)#rlm(V1 ~ ., data=xilr2, method="MM",maxit = 100)
          yhat <- predict(reg1, newdata=data.frame(predictors))
        } else if(method=="pls"){
          if(it == 1 & pre){ ## evaluate ncomp.
            nC[i] <- bootnComp(xilr[, 2:ncol(xilr), drop=FALSE], y=xilr[, 1], R, 
                               plotting=FALSE)$res #$res2
          }
          if(verbose) cat("   ;   ncomp:", nC[i])
          reg1 <- mvr(as.matrix(response) ~ as.matrix(predictors), ncomp=nC[i], method="simpls")
          yhat <- predict(reg1, newdata=data.frame(predictors), ncomp=nC[i])
        }
        
        #		s <- sqrt(sum(reg1$res^2)/abs(nrow(xilr)-ncol(xilr))) ## quick and dirty: abs()
        s <- sqrt(sum(reg1$res^2)/nrow(xilr)) 
        yhat <- as.numeric(yhat)
        ex <- (phi - yhat)/s 
        if(correction=="normal"){
          yhat2sel <- ifelse(dnorm(ex[w[, i]]) > .Machine$double.eps,
                             yhat[w[, i]] - s*dnorm(ex[w[, i]])/pnorm(ex[w[, i]]),
                             yhat[w[, i]])
        } else if(correction=="density"){
          den <- density(ex[w[,i]])
          distr <- sROC::kCDF(ex[w,i])
        }
        if(any(is.na(yhat)) || any(yhat=="Inf")) stop("Problems in ilr because of infinite or NA estimates")
        # check if we are under the DL:
        if(any(yhat2sel >= phi[w[, i]])){
          yhat2sel <- ifelse(yhat2sel > phi[w[, i]], phi[w[, i]], yhat2sel)
        }
        xilr[w[, i], 1] <- yhat2sel
        if(test) xinv <- isomLRInvp(xilr) else xinv <- pivotCoordInv(xilr)
        ## if variation:
        if(variation == TRUE){
          xneworder <- adjustImputed(xinv, xneworder, w2[, cols])
          orig[, cols] <- xneworder #* fac
          xinv <- orig
        }
        ## reordering of xOrig
        if(i %in% 2:(d-1)){
          xinv <- cbind(xinv[,2:i], xinv[,c(1,(i+1):d)])
        }
        if(i == d){
          xinv <- cbind(xinv[,2:d], xinv[,1])
        }
        #       browser()
        if(!variation){ 
          x <- adjustImputed(xinv, xOrig, w2)
        } else {
          x <- xinv
        }
        #		x <- adjust3(xinv, xOrig, w2) 
        #		## quick and dirty:
        #		x[!w] <- xOrig[!w]
      }
      
      it <- it + 1
      criteria <- sum( ((xold - x)/x)^2, na.rm=TRUE) ## DIRTY: (na.rm=TRUE)
      if(verbose & criteria != 0) cat("\n iteration", it, "; criteria =", criteria)
    }
    
    #### add random error ###
    if(noise){
      for(i in which(indNA)){
        if(verbose) cat("\n add noise on variable", i)
        
        # add error terms
        inderr <- w[,i]
        if(noisemethod == "residuals") {
          error <- sample(residuals( reg1 )[inderr], 
                          size=wcol[i], replace=TRUE)
          reg1$res[inderr] <- error
        } else {
          mu <- median(residuals( reg1 )[inderr])
          sigma <- mad(residuals( reg1 )[inderr])
          error <- rnorm(wcol[i], mean=mu, sd=sigma)
          reg1$res[inderr] <- error		   
        }
        # return realizations
        yhat[inderr] <- yhat[inderr] + error
        
        
        s <- sqrt(sum(reg1$res^2)/nrow(xilr)) ## quick and dirty: abs()
        ex <- (phi - yhat)/s 
        yhat2sel <- ifelse(dnorm(ex[w[, i]]) > .Machine$double.eps,
                           yhat[w[, i]] - s*dnorm(ex[w[, i]])/pnorm(ex[w[, i]]),
                           yhat[w[, i]])
        if(any(is.na(yhat)) || any(yhat=="Inf")) stop("Problems in ilr because of infinite or NA estimates")
        # check if we are under the DL:
        if(any(yhat2sel >= phi[w[, i]])){
          yhat2sel <- ifelse(yhat2sel > phi[w[, i]], phi[w[, i]], yhat2sel)
        }
        xilr[w[, i], 1] <- yhat2sel
        if(test) xinv <- isomLRInvp(xilr) else xinv <- pivotCoordInv(xilr) 
        ## reordering of xOrig
        if(i %in% 2:(d-1)){
          xinv <- cbind(xinv[,2:i], xinv[,c(1,(i+1):d)])
        }
        if(i == d){
          xinv <- cbind(xinv[,2:d], xinv[,1])
        }
        
        #	   x <- adjust2(xinv, xOrig, w) 
        #	   ## quick and dirty:
        #	   x[!w] <- xOrig[!w]
      }
    }
    ### end add random error ###
    #   x <- adjust3(x, xOrig, w)
    #   x[!w] <- xOrig[!w] 
    x <- x[,order(o)] ## checked: reordering is OK!
    colnames(x) <- cn
    
    if(verbose){
      message(paste(sum(w)), "values below detection limit has been imputed \n below the corresponding detection limits")
    }
    
    
    x <- checkDL(x, dl, indexFinalCheck)
    
    res <- list(x=x, criteria=criteria, iter=it, 
                maxit=maxit, wind=w, nComp=nC, nPred=nPred,
                variation=variation,
                method=method, dl=dl)
    class(res) <- "replaced"
    return(res)
  }

#' @rdname imputeBDLs
#' @export
#' @param xImp imputed data set
#' @param xOrig original data set
#' @param wind index matrix of rounded zeros
adjustImputed <- function(xImp, xOrig, wind){
  ## aim: 
  ## (1) ratios must be preserved
  ## (2) do not change original values
  ## (3) adapt imputations
  xneu  <- xImp
  s1 <- rowSums(xOrig, na.rm = TRUE)
  ## per row: consider rowsums of imputed data
  sumPrevious <- sumAfter <- numeric(nrow(xImp))
  for (i in 1:nrow(xImp)) {
    if(any(wind[i, ]) & !all(wind[i,])){
      sumPrevious[i] <- sum(xOrig[i, !wind[i, ]]) 
      sumAfter[i] <- sum(xImp[i, !wind[i,]])
    } else{ 
      sumPrevious[i] <- sumAfter[i] <- 1
    }
  }
  # how much is rowsum increased by imputation:
  fac <- sumPrevious/sumAfter
  
  #    # decrese rowsums of orig.
  #    s1[i] <- s1[i]/fac
  #  }
  ## for non-zeros overwrite them:
  xneu[!wind] <- xOrig[!wind]
  ## adjust zeros:
  for(i in 1:nrow(xImp)){
    if(any(wind[i,])){
      xneu[i,wind[i,]] <- fac[i]*xneu[i,wind[i,]]
    }
  }
  return(xneu)
}

#' @rdname imputeBDLs
#' @export
`checkData` <- function(x, dl){
  if(any(is.na(x))) stop("your data includes missing values.\n Use impKNNa() or impCoda() to impute them first.")
  if( is.vector(x) ) stop("x must be a matrix or data frame")
  ## check if only numeric variables are in x:
  cl <- lapply(x, class)
  if(!all(cl %in% "numeric")) stop("some of your variables are not of class numeric.")
  if( length(dl) < ncol(x)) stop(paste("dl has to be a vector of ", ncol(x)))
  if(any(is.na(x))) stop("missing values are not allowed. \n Use impKNNa or impCoda to impute them first.")
  
  #     pre <- TRUE
  #      if(length(nComp) != ncol(x) & nComp!="boot") stop("nComp must be NULL, boot or of length ncol(x)")
  #    } else if(nComp == "boot"){#
  #		pre <- TRUE
  #	} else {
  #		pre <- FALSE
  #	}
  #################
  ## zeros to NA:
  # check if values are in (0, dl[i]):
  check <- logical(ncol(x))
  for(i in 1:ncol(x)){
    #      check[i] <- any(x[,i] < dl[i] & x[,i] != 0)
    x[x[,i] < dl[i],i] <- 0
  }
  #    if(any(check)){warning("values below detection limit have been set to zero and will be imputed")}
  check2 <- any(x < 0)
  if(check2){warning("values below 0 set have been set to zero and will be imputed")}
  x[x == 0] <- NA
  x[x < 0] <- NA
  indexFinalCheck <- is.na(x)
  
  ## check if rows consists of only zeros:
  checkRows <- unlist(apply(x, 1, function(x) all(is.na(x))))
  if(any(checkRows)){ 
    w <- which(checkRows)
    cat("\n--------\n")
    message("Rows with only zeros are not allowed")
    message("Remove this rows before running the algorithm")
    cat("\n--------\n")      
    stop(paste("Following rows with only zeros:", w))
  }  
  
  ## check if cols consists of only zeros:
  checkCols <- unlist(apply(x, 2, function(x) all(is.na(x))))
  if(any(checkCols)){ 
    w <- which(checkCols)
    cat("\n--------\n")
    message("Cols with only zeros are not allowed")
    message("Remove this columns before running the algorithm")
    cat("\n--------\n")      
    stop(paste("\n Following cols with only zeros:", colnames(x)[w]))
  }  
  
  indNA <- apply(x, 2, function(x){any(is.na(x))})
  
  ## check if for any variable with zeros,
  ## the detection limit should be larger than 0:
  if(any(dl[indNA]==0)){
    w <- which(dl[indNA]==0)
    invalidCol <- colnames(x)[w]
    for(i in 1:length(invalidCol)){
      cat("-------\n")
      cat(paste("Error: variable/part", invalidCol[i], 
                "has detection limit 0 but includes zeros"))
      cat("\n-------\n")
    }
    stop(paste("Set detection limits larger than 0 for variables/parts \n including zeros"))
  }
  
}

#' @rdname imputeBDLs
#' @method print replaced  
#' @export
#' @param ... further arguments passed through the print function
print.replaced <- function(x, ...){
  message(paste("\n", sum(x$w), "values below detection limit were imputed \n below their corresponding detection limits.\n"))
}

#' Bootstrap to find optimal number of components
#' 
#' Combined bootstrap and cross validation procedure to find optimal number of
#' PLS components
#' 
#' Heavily used internally in function impRZilr.
#' 
#' @param X predictors as a matrix
#' @param y response
#' @param R number of bootstrap replicates
#' @param plotting if TRUE, a diagnostic plot is drawn for each bootstrap
#' replicate
#' @return Including other information in a list, the optimal number of
#' components
#' @author Matthias Templ
#' @seealso \code{\link{impRZilr}}
#' @keywords manip
#' @export
#' @examples
#' 
#' ## we refer to impRZilr()
#' 
bootnComp <- function(X, y, R=99, plotting=FALSE){
  ind <- 1:nrow(X)
  d <- matrix(, ncol=R, nrow=nrow(X))#nrow(X))
  nc <- integer(R)
  for(i in 1:R){
    bootind <- sample(ind)
    #    XX <- X
    #    yy <- y
    if(is.null(ncol(y))){
      ds <- cbind(X[bootind,], as.numeric(y[bootind]))
      colnames(ds)[ncol(ds)] <- "V1"
      res1 <- mvr(V1~., data=data.frame(ds), method="simpls", 
                  validation="CV")
    } else {
      ds <- cbind(X[bootind,], as.matrix(y[bootind,])) 
      form <- paste(paste(colnames(ds[, (ncol(X)+1):ncol(ds)]), collapse = "+"), "~", paste(colnames(ds[, 1:ncol(X)]), collapse = "+"))
      form <- as.formula(form)
      res1 <- mvr(form, data=data.frame(ds), method="simpls", 
                  validation="CV") 
    }
    
    
    d[1:res1$ncomp, i] <- res1$validation$PRESS
    nc[i] <- which.min(res1$validation$PRESS)
    #    d[1:reg1$ncomp,i] <- as.numeric(apply(reg1$validation$pred, 3, 
    #                                  function(x) sum(((y - x)^2)) ) )
  }
  d <- na.omit(d)
  sdev <- apply(d, 1, sd, na.rm=TRUE)
  means <- apply(d, 1, mean, na.rm=TRUE)
  mi <- which.min(means)
  r <- round(ncol(X)/20)
  mi2 <- which.min(means[r:length(means)])+r-1
  w <- means < min(means) + sdev[mi]
  threshold <- min(means) + sdev[mi]
  sdev <- sdev
  means <- means
  mi <- mi
  means2 <- means
  means2[!w] <- 999999999999999
  res <- which.min(means2)
  mi3 <- which.max(w)
  #  minsd <- means - sdev > means[mi]
  #  check <- means
  #  check[!minsd] <- 99999999
  if(plotting) plot(means, type="l")
  #  res <- which.min(check)
  list(res3=mi3, res2=mi2, res=res, means=means)
}


bootnCompHD <- function(X,y, R=99, plotting=FALSE){
  ind <- 1:nrow(X)
  d <- matrix(, ncol=R, nrow=nrow(X))#nrow(X))
  for(i in 1:R){
    bootind <- sample(ind)
    XX <- X
    yy <- y
    ds <- cbind(X[bootind,], as.numeric(y[bootind]))
    colnames(ds)[ncol(ds)] <- "V1"
    reg1 <- mvr(V1~., data=data.frame(ds), method="simpls", validation="CV")
    d[1:reg1$ncomp,i] <- as.numeric(apply(reg1$validation$pred, 3, function(x) sum(((y - x)^2)) ) )
  }
  d <- na.omit(d)
  sdev <- apply(d, 1, mad, na.rm=TRUE)
  means <- apply(d, 1, median, na.rm=TRUE)
  mi <- which.min(means)
  if(plotting) plot(means, type="l", col="blue", ylab="squared total prediction error", xlab="number of components")
  themean <- mean(means)
  thesd <- sd(means)
  abovethreshold <- themean - sdev > means
  check <- means
  check[!abovethreshold] <- 9999999999
  res <- which.min(check)
  #	minsd <- means - sdev > means[mi]
  #	check <- means
  #	check[!minsd] <- 99999999
  #	res <- which.min(check)
  if(plotting){
    abline(v=res, lwd=3)
    abline(h=mi, col="red")
    #		abline(h=means-sdev, lty=3)
  }
  list(res=res, mean=means)
}


#checkIfValuesUnderDL <- function(x, dl, wind){
#	check <- logical(ncol(x))
#	for(i in 1:ncol(x)){
#		check[i] <- any(x[,i] > dl[i])
#	}	
#	return(check)
#}

## test adjust2:

adjust2 <- function (xImp, xOrig, wind){
  ## aim: do not change original values
  ## adapt imputations
  xneu = xImp
  s1 <- rowSums(xOrig, na.rm = TRUE)
  ## per row: consider rowsums of imputed data
  ## example: 
  ## wind: F F T F F
  ## ganz orig:  3 5 NA 8 10   (sum=26)
  ## orig(init): 3 5 6.5 8 10  (sum=32.5)
  ## imp:        3 5 7 8 10    (sum=33)
  ## s: 26
  ## s2: 7
  ## fac: 26/(26+7)
  ## s1: 32.5/(26/(26+7)) =  41.25
  for (i in 1:nrow(xImp)) {
    if(any(wind[i,])) s <- sum(xImp[i, !wind[i, ]]) else s <- 1
    if(any(wind[i,])) s2 <- sum(xImp[i, wind[i, ]]) else s2 <- 0
    # how much is rowsum increased by imputation:
    fac <- s/(s + s2)
    # decrese rowsums of orig.
    s1[i] <- s1[i]/fac
  }
  ## impS: 41.25/33
  impS <- s1/rowSums(xImp)
  for (i in 1:ncol(xImp)) {
    xneu[, i] <- xImp[, i] * impS
  }
  xImp <- xneu
  return(xImp)
}


adjust3 <- function(xImp, xOrig, wind){
  xOrigSum <- rowSums(xOrig)
  # sum imputed without former zeros:
  xImpSum <- numeric(ncol(xOrig))
  for(i in 1:nrow(xOrig)){
    xImpSum[i] <- sum(xImp[i,!wind[i,]])
    fac <- xOrigSum[i] / xImpSum[i]
    xImp[i,wind[i,]] <- xImp[i,wind[i,]]  * fac
  }
  xImp[!wind] <- xOrig[!wind]
  xImp
}





#
### switch function to automatically select methods
#getM <- function(xReg, ndata, type, index,mixedTF,mixedConstant,factors,step,robust,noise,noise.factor=1,force=FALSE, robMethod="MM") {
#	switch(type,
#			numeric = useLM(xReg, ndata, index, mixedTF,mixedConstant,factors,step,robust,noise,noise.factor,force,robMethod),
#			factor  = useMN(xReg, ndata, index,factors,step,robust),
#			bin     = useB(xReg, ndata, index,factors,step,robust),
#			count   = useGLMcount(xReg, ndata, index, factors, step, robust)
#	)
#}
#
#
#
#f <- function(){	
##	x <- constSum(x)
##	dl <- dl/sum(dl)
#	## initialisation:
##		x[is.na(x)] <- 0.001
#	for(i in 1:length(dl)){
#		ind <- is.na(x[,i])
#		#PF# if(length(ind) > 0) x[ind,i] <- dl[i]/3*2 
#		if(length(ind) > 0) x[ind,i] <- dl[i]*runif(sum(ind),1/3,2/3)
#	}
#	
##		x <- constSum(x)
#	
#	## parameters:
#	it=0
#	criteria <- 10000000
#	error <- rep(0, ncol(x))
#	nComp <- normalerror <- numeric(ncol(x))
#	if(noisemethod=="residuals") residuals <- matrix(,ncol=ncol(x), nrow=nrow(x))
#	nComp[nComp==0] <- NA
#	criteria <- 1e+07
#	sigma <- mu <- rep(0, ncol(x))
#	
#	###########################################
#	###  start the iteration
#	if(verbose) cat("\n start the iteration:")
#	while(it <= maxit & criteria >= eps){
#		xold <- x
#		it=it+1
#		for(i in which(indNA)){
#			if(verbose) cat("\n column", i)
#			## change the first column with that one with the highest amount of NAs
#			## in the step
#			if(wcol[indM[i]] > 0){
#				xNA=x[,indM[i]]
#				x1=x[,1]
#				x[,1]=xNA
#				x[,indM[i]]=x1
#				
#				if(method == "roundedZero"){
#					xilr <- pivotCoord(x)
#					phi <- pivotCoord(cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE]))[,1] # TODO: phi auserhalb der Schleife!
#					## --> x hat sich geaendert aber dl nicht.
#					xilr2 <- data.frame(xilr)
#					c1 <- colnames(xilr2)[1]
#					colnames(xilr2)[1] <- "V1"
#					reg1 = lm(V1 ~ ., data=xilr2)
#					yhat2 <- predict(reg1, new.data=xilr2[,-i]) 	
#					if(bruteforce){ 
#						xilr2[w[, indM[i]], 1] <- ifelse(yhat2[w[, indM[i]]] <= phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2[w[, indM[i]]] )
#					} else {
#						s <- sqrt(sum(reg1$res^2)/(nrow(xilr2)-ncol(xilr2)))
#						ex <- (phi - yhat2)/s 
#						yhat2sel <- ifelse(dnorm(ex[w[, indM[i]]]) > .Machine$double.eps,
#								yhat2[w[, indM[i]]] - s*dnorm(ex[w[, indM[i]]])/pnorm(ex[w[, indM[i]]]),
#								yhat2[w[, indM[i]]])
#						if(any(is.na(yhat2)) || any(yhat2=="Inf")) stop("Problems in ilr because of infinite or NA estimates")
#						# check if we are under the DL:
#						if(any(yhat2sel >= phi[w[, indM[i]]])){
#							yhat2sel <- ifelse(yhat2sel > phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2sel)
#						}
#						xilr2[w[, indM[i]], 1] <- yhat2sel
#					}
#				}
#				if (method == "pls") {	
#					xilr = pivotCoord(x)
#					ttt <- cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE])
#					phi <- pivotCoord(cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE]))[,1]
##				if(verbose) cat("\n phi", phi, "in iteration", it)
#					xilr2 <- data.frame(xilr)	
#					c1 <- colnames(xilr2)[1]					
#					colnames(xilr2)[1] <- "V1"				
#					if(it == 1){ ## evaluate ncomp.
#						test <- xilr2[,!(colnames(xilr2) == "V1")]
#						testy <- xilr2[,"V1"]
#						X <- xilr2[,!(colnames(xilr2) == "V1")]
#						y <- xilr2[,"V1"]
#						nComp[i] <- bootnComp(xilr2[,!(colnames(xilr2) == "V1")],y=xilr2[,"V1"], R)
#					}
#					reg1 <- mvr(V1~.,ncomp=nComp[i], data = xilr2, method="simpls")
#					yhat2 <- as.numeric(predict(reg1, new.data=xilr2[,-i], ncomp=nComp[i]) )
##				if(noisemethod=="residuals") residuals[,i] <- reg1$residuals[,,nComp[i]]
##				if(noisemethod=="normal"){
##					mu[i] <- median(residuals(reg1)[,,nComp[i]])
##					sigma[i] <- mad(residuals(reg1)[,,nComp[i]]) * noiseeffect		  
##				}
#					colnames(xilr2)[1] <- c1					
##				fit=reg1$fitted.values[,,nComp[i]]	
#					s <- sqrt(sum(reg1$residuals[,,nComp[i]]^2)/abs(nrow(xilr2)-ncol(xilr2)))
#					ex <- as.numeric((phi - yhat2)/s )
#					yhat2sel <- ifelse(dnorm(ex[w[, indM[i]]]) > .Machine$double.eps,
#							yhat2[w[, indM[i]]] - s*dnorm(ex[w[, indM[i]]])/pnorm(ex[w[, indM[i]]]),
#							yhat2[w[, indM[i]]])
#					if(any(is.na(yhat2)) || any(yhat2=="Inf")) stop("Problems in ilr because of NaN or NA estimates")
#					# check if we are under the DL:
#					if(any(yhat2sel >= phi[w[, indM[i]]])){
#						yhat2sel <- ifelse(yhat2sel > phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2sel)
#					}
##				if(verbose) cat("\n yhat2sel at iteration", it, "on column", i ,"is", yhat2sel)
#					xilr2[w[, indM[i]], 1] <- yhat2sel
#					
#					
#				}	
#				if(method == "roundedZeroRobust"){
#					xilr <- pivotCoord(x)
#					x[x < .Machine$double.eps] <- 0.00000000001  ## TODO: better solution 
#					phi <- pivotCoord(cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE]))[,1] # TODO: phi auserhalb der Schleife!
#					xilr2 <- data.frame(xilr)
#					c1 <- colnames(xilr2)[1]
#					colnames(xilr2)[1] <- "V1"
#					reg1 = rlm(V1 ~ ., data=xilr2, method="MM",maxit = 100)
##	            reg1 = lmrob(V1 ~ ., data=xilr2)
#					yhat2 <- predict(reg1, new.data=xilr2[,-i]) 	
#					if(bruteforce){ 
#						xilr2[w[, indM[i]], 1] <- ifelse(yhat2[w[, indM[i]]] <= phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2[w[, indM[i]]] )
#					} else {
#						s <- reg1$s
#						#PF# s <- IQR(reg1$resid)/1.349
#						ex <- (phi - yhat2)/s 
#						yhat2sel <- ifelse(dnorm(ex[w[, indM[i]]]) > .Machine$double.eps,
#								yhat2[w[, indM[i]]] - s*dnorm(ex[w[, indM[i]]])/pnorm(ex[w[, indM[i]]]), 
#								yhat2[w[, indM[i]]])
#						if(any(is.na(yhat2)) || any(yhat2=="Inf")) stop("Problems in ilr because of infinite or NA estimates")
#						# check if we are under the DL:
#						if(any(yhat2sel >= phi[w[, indM[i]]])){
#							yhat2sel <- ifelse(yhat2sel > phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2sel)
#						}
#						xilr2[w[, indM[i]], 1] <- yhat2sel
#					}
#				}
#				
#				xilr <- xilr2 
#				x <- pivotCoordInv(xilr)	
#				
#				## return the order of columns:
#				xNA=x[,1]
#				x1=x[,indM[i]]
#				x[,1]=x1
#				x[,indM[i]]=xNA
#				x <- adjust2(x, xcheck, w)
#				print(x[1:2,1:2])
#				if(verbose) cat("\n iteration", it, "column", i, "value", x[2,1])
#			}
#			
#			
#		}
#		
#		
#		criteria <- sum( ((xold - x)/x)^2, na.rm=TRUE) ## DIRTY: (na.rm=TRUE)
#		colnames(x) <- colnames(xcheck)
#		
#	}
#	
#	res <- list(xOrig=xcheck, xImp=x[,order(o)], criteria=criteria, iter=it,  nComp=nComp,
#			maxit=maxit, w=length(which(w)), wind=w)
#	class(res) <- "imp"
##	res <- adjust(res)
#	invisible(res)
#}
#
#
#
##################################################################################
#
#`impRZilr` <-
#function(x, maxit=10, eps=0.1, method="roundedZero", 
#		dl=rep(0.05, ncol(x)), bruteforce=FALSE,  noise = TRUE, noisemethod="residuals", noiseeffect=1, R=10,
#		verbose=FALSE){
#
#	
#	if( is.vector(x) ) stop("x must be a matrix or data frame")
#	stopifnot((method %in% c("ltsReg", "ltsReg2", "classical", "lm", "roundedZero","roundedZeroRobust","pls")))
#    if( length(dl) < ncol(x)) stop(paste("dl has to be a vector of ", ncol(x)))
#	
#	#################
#	## zeros to NA:
#	x[x==0] <- NA
#
#	#################
#	## index of missings / non-missings
#	w <- is.na(x)
#	wn <- !is.na(x)
#	w2 <- apply(x, 1, function(x){
#          length(which(is.na(x)))
#	})
#    indNA <- apply(x, 2, function(x){any(is.na(x))})
#
#    #################
#	## sort the columns of the data according to the amount of missings in the variables
#	wcol <- apply(x, 2, function(x) length(which(is.na(x))))
#	indM <- sort(wcol, index.return=TRUE, decreasing=TRUE)$ix
#	cn <- colnames(x)
#	xcheck <- x
#	
#	################
#	## detection limit in ilr-space
##	for(i in which(indNA)){
##	   	
##	}
#	
#	
##	x <- constSum(x)
##	dl <- dl/sum(dl)
#	## initialisation:
##		x[is.na(x)] <- 0.001
#	    for(i in 1:length(dl)){
#		   ind <- is.na(x[,i])
#		   #PF# if(length(ind) > 0) x[ind,i] <- dl[i]/3*2 
#		   if(length(ind) > 0) x[ind,i] <- dl[i]*runif(sum(ind),1/3,2/3)
#	    }
#		
##		x <- constSum(x)
#		
#    ## parameters:
#		it=0
#		criteria <- 10000000
#		error <- rep(0, ncol(x))
#		nComp <- normalerror <- numeric(ncol(x))
#		if(noisemethod=="residuals") residuals <- matrix(,ncol=ncol(x), nrow=nrow(x))
#		nComp[nComp==0] <- NA
#		criteria <- 1e+07
#		sigma <- mu <- rep(0, ncol(x))
#		
#	###########################################
#	###  start the iteration
#	if(verbose) cat("\n start the iteration:")
#	while(it <= maxit & criteria >= eps){
#  		xold <- x
#  		it=it+1
#  		for(i in which(indNA)){
#			if(verbose) cat("\n column", i)
#		    ## change the first column with that one with the highest amount of NAs
#		    ## in the step
#			if(wcol[indM[i]] > 0){
#		    xNA=x[,indM[i]]
#		    x1=x[,1]
#		    x[,1]=xNA
#		    x[,indM[i]]=x1
#			
#			if(method == "roundedZero"){
#				xilr <- pivotCoord(x)
#				phi <- pivotCoord(cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE]))[,1] # TODO: phi auserhalb der Schleife!
#				## --> x hat sich geaendert aber dl nicht.
#				xilr2 <- data.frame(xilr)
#				c1 <- colnames(xilr2)[1]
#				colnames(xilr2)[1] <- "V1"
#				reg1 = lm(V1 ~ ., data=xilr2)
#				yhat2 <- predict(reg1, new.data=xilr2[,-i]) 	
#				if(bruteforce){ 
#					xilr2[w[, indM[i]], 1] <- ifelse(yhat2[w[, indM[i]]] <= phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2[w[, indM[i]]] )
#				} else {
#					s <- sqrt(sum(reg1$res^2)/(nrow(xilr2)-ncol(xilr2)))
#					ex <- (phi - yhat2)/s 
#                                        yhat2sel <- ifelse(dnorm(ex[w[, indM[i]]]) > .Machine$double.eps,
#                                                           yhat2[w[, indM[i]]] - s*dnorm(ex[w[, indM[i]]])/pnorm(ex[w[, indM[i]]]),
#                                                           yhat2[w[, indM[i]]])
#                                        if(any(is.na(yhat2)) || any(yhat2=="Inf")) stop("Problems in ilr because of infinite or NA estimates")
#                                        # check if we are under the DL:
#                                        if(any(yhat2sel >= phi[w[, indM[i]]])){
#						yhat2sel <- ifelse(yhat2sel > phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2sel)
#					}
#                                        xilr2[w[, indM[i]], 1] <- yhat2sel
#		        }
#			}
#			if (method == "pls") {	
#				xilr = pivotCoord(x)
#				ttt <- cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE])
#				phi <- pivotCoord(cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE]))[,1]
##				if(verbose) cat("\n phi", phi, "in iteration", it)
#				xilr2 <- data.frame(xilr)	
#				c1 <- colnames(xilr2)[1]					
#				colnames(xilr2)[1] <- "V1"				
#				if(it == 1){ ## evaluate ncomp.
#					test <- xilr2[,!(colnames(xilr2) == "V1")]
#					testy <- xilr2[,"V1"]
#					X <- xilr2[,!(colnames(xilr2) == "V1")]
#					y <- xilr2[,"V1"]
#					nComp[i] <- bootnComp(xilr2[,!(colnames(xilr2) == "V1")],y=xilr2[,"V1"], R)
#				}
#				reg1 <- mvr(V1~.,ncomp=nComp[i], data = xilr2, method="simpls")
#				yhat2 <- as.numeric(predict(reg1, new.data=xilr2[,-i], ncomp=nComp[i]) )
##				if(noisemethod=="residuals") residuals[,i] <- reg1$residuals[,,nComp[i]]
##				if(noisemethod=="normal"){
##					mu[i] <- median(residuals(reg1)[,,nComp[i]])
##					sigma[i] <- mad(residuals(reg1)[,,nComp[i]]) * noiseeffect		  
##				}
#				colnames(xilr2)[1] <- c1					
##				fit=reg1$fitted.values[,,nComp[i]]	
#				s <- sqrt(sum(reg1$residuals[,,nComp[i]]^2)/abs(nrow(xilr2)-ncol(xilr2)))
#				ex <- as.numeric((phi - yhat2)/s )
#				yhat2sel <- ifelse(dnorm(ex[w[, indM[i]]]) > .Machine$double.eps,
#						yhat2[w[, indM[i]]] - s*dnorm(ex[w[, indM[i]]])/pnorm(ex[w[, indM[i]]]),
#						yhat2[w[, indM[i]]])
#				if(any(is.na(yhat2)) || any(yhat2=="Inf")) stop("Problems in ilr because of NaN or NA estimates")
#				# check if we are under the DL:
#				if(any(yhat2sel >= phi[w[, indM[i]]])){
#					yhat2sel <- ifelse(yhat2sel > phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2sel)
#				}
##				if(verbose) cat("\n yhat2sel at iteration", it, "on column", i ,"is", yhat2sel)
#				xilr2[w[, indM[i]], 1] <- yhat2sel
#				
#				
#			}	
#			if(method == "roundedZeroRobust"){
#				xilr <- pivotCoord(x)
#				x[x < .Machine$double.eps] <- 0.00000000001  ## TODO: better solution 
#				phi <- pivotCoord(cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE]))[,1] # TODO: phi auserhalb der Schleife!
#				xilr2 <- data.frame(xilr)
#				c1 <- colnames(xilr2)[1]
#				colnames(xilr2)[1] <- "V1"
#				reg1 = rlm(V1 ~ ., data=xilr2, method="MM",maxit = 100)
##	            reg1 = lmrob(V1 ~ ., data=xilr2)
#				yhat2 <- predict(reg1, new.data=xilr2[,-i]) 	
#				if(bruteforce){ 
#					xilr2[w[, indM[i]], 1] <- ifelse(yhat2[w[, indM[i]]] <= phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2[w[, indM[i]]] )
#				} else {
#					s <- reg1$s
#					#PF# s <- IQR(reg1$resid)/1.349
#					ex <- (phi - yhat2)/s 
#					yhat2sel <- ifelse(dnorm(ex[w[, indM[i]]]) > .Machine$double.eps,
#					                   yhat2[w[, indM[i]]] - s*dnorm(ex[w[, indM[i]]])/pnorm(ex[w[, indM[i]]]), 
#					                   yhat2[w[, indM[i]]])
#					if(any(is.na(yhat2)) || any(yhat2=="Inf")) stop("Problems in ilr because of infinite or NA estimates")
#					# check if we are under the DL:
#                                        if(any(yhat2sel >= phi[w[, indM[i]]])){
#						yhat2sel <- ifelse(yhat2sel > phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2sel)
#					}
#					xilr2[w[, indM[i]], 1] <- yhat2sel
#				}
#			}
#			
#			xilr <- xilr2 
#			x <- pivotCoordInv(xilr)	
#
#			## return the order of columns:
#			xNA=x[,1]
#			x1=x[,indM[i]]
#			x[,1]=x1
#			x[,indM[i]]=xNA
#			x <- adjust2(x, xcheck, w)
#			print(x[1:2,1:2])
#			if(verbose) cat("\n iteration", it, "column", i, "value", x[2,1])
#			}
#
#
# 	   }
#
#
#  	  criteria <- sum( ((xold - x)/x)^2, na.rm=TRUE) ## DIRTY: (na.rm=TRUE)
#	  colnames(x) <- colnames(xcheck)
#
#	}
#		
#	res <- list(xOrig=xcheck, xImp=x, criteria=criteria, iter=it,  nComp=nComp,
#			    maxit=maxit, w=length(which(w)), wind=w)
#	class(res) <- "imp"
##	res <- adjust(res)
#	invisible(res)
#}


# `impRZilr` <-
# function(x, maxit=10, eps=0.1, method="roundedZero", 
# 		dl=rep(0.05, ncol(x)), bruteforce=FALSE){
# 
# 	`ilrM` <-
# 			function(x, info=TRUE){
# 		x.ilr=matrix(NA,nrow=nrow(x),ncol=ncol(x)-1)
# 		D=ncol(x)
# 		for (i in 1:ncol(x.ilr)){
# 			x.ilr[,i]=sqrt((D-i)/(D-i+1))*log(((apply(as.matrix(x[,(i+1):D,drop=FALSE]),1,prod))^(1/(D-i)))/(x[,i]))
# 		} 
# #		invisible(-x.ilr)
#         if(info)  {res <- list(xilr=-x.ilr,
# 			 xOrig=x)
# 	         class(res) <- "ilrTransform"
# 	    } else {
# 			res <- -x.ilr
# 		}
# 		res
# 	}
# 	`invilrM` <-
# 			function(x.ilr){
# 		if(class(x.ilr) =="ilrTransform" ){
# 			fac <- rowSums(x.ilr$xOrig)
# 			x.ilr <- x.ilr$xilr
# 			y <- matrix(0,nrow=nrow(x.ilr),ncol=ncol(x.ilr)+1)
# 			D=ncol(x.ilr)+1
# 			y[,1]=-sqrt((D-1)/D)*x.ilr[,1]
# 			for (i in 2:ncol(y)){
# 				for (j in 1:(i-1)){
# 					y[,i]=y[,i]+x.ilr[,j]/sqrt((D-j+1)*(D-j))
# 				}
# 			}
# 			for (i in 2:(ncol(y)-1)){
# 				y[,i]=y[,i]-sqrt((D-i)/(D-i+1))*x.ilr[,i]
# 			}
# 			yexp=exp(-y)
# 			x.back=yexp/apply(yexp,1,sum) * fac # * rowSums(derOriginaldaten)
# 			invisible(x.back)			
# 		} else {
# 			y=matrix(0,nrow=nrow(x.ilr),ncol=ncol(x.ilr)+1)
# 			D=ncol(x.ilr)+1
# 			y[,1]=-sqrt((D-1)/D)*x.ilr[,1]
# 			for (i in 2:ncol(y)){
# 				for (j in 1:(i-1)){
# 					y[,i]=y[,i]+x.ilr[,j]/sqrt((D-j+1)*(D-j))
# 				}
# 			}
# 			for (i in 2:(ncol(y)-1)){
# 				y[,i]=y[,i]-sqrt((D-i)/(D-i+1))*x.ilr[,i]
# 			}
# 			yexp=exp(-y)
# 			x.back=yexp/apply(yexp,1,sum) # * rowSums(derOriginaldaten)
# 			invisible(x.back)
#         #return(yexp)
# 		}
# 		x.back
# 	}
# 	
# 	
# 	
# 	if( is.vector(x) ) stop("x must be a matrix or data frame")
# 	stopifnot((method %in% c("ltsReg", "ltsReg2", "classical", "lm", "roundedZero","roundedZeroRobust")))
#     if( length(dl) < ncol(x)) stop(paste("dl has to be a vector of ", ncol(x)))
# 	
# 
# 	## zeros to NA:
# 	x[x==0] <- NA
# 
# 	##index of missings / non-missings
# 	w <- is.na(x)
# 	wn <- !is.na(x)
# 	w2 <- apply(x, 1, function(x){
#           length(which(is.na(x)))
# 	})
# 
# 
# 	##sort the columns of the data according to the amount of missings in the variables
# 	wcol <- apply(x,2,function(x) length(which(is.na(x))))
# 	indM <- sort(wcol, index.return=TRUE, decreasing=TRUE)$ix
# 	cn <- colnames(x)
# 	xcheck <- x
# 
# 	## initialisation:
# #		x[is.na(x)] <- 0.001
# 	    for(i in 1:length(dl)){
# 		   ind <- is.na(x[,i])
# 		   #PF# if(length(ind) > 0) x[ind,i] <- dl[i]/3*2 
# 		   if(length(ind) > 0) x[ind,i] <- dl[i]*runif(sum(ind),1/3,2/3)
# 	    }
# 		
# #		x <- constSum(x)
# 		
#     ## parameters:
# 		it=0
# 		criteria <- 10000000
# 		error <- rep(0, ncol(x))
# 		
# 	###########################################
# 	###  start the iteration
# 	while(it <= maxit & criteria >= eps){
#   		xold <- x
#   		it=it+1
#   		for(i in 1:ncol(x)){
# 		    ## change the first column with that one with the highest amount of NAs
# 		    ## in the step
# 			if(wcol[indM[i]] > 0){
# 		    xNA=x[,indM[i]]
# 		    x1=x[,1]
# 		    x[,1]=xNA
# 		    x[,indM[i]]=x1
# 			
# 			if(method == "roundedZero"){
# 				xilr <- ilrM(x)
# 				phi <- ilrM(cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE]), info=FALSE)[,1] # TODO: phi auserhalb der Schleife!
# 				## --> x hat sich geaendert aber dl nicht.
# 				xilr2 <- data.frame(xilr$xilr)
# 				c1 <- colnames(xilr2)[1]
# 				colnames(xilr2)[1] <- "V1"
# 				reg1 = lm(V1 ~ ., data=xilr2)
# 				yhat2 <- predict(reg1, new.data=xilr2[,-i]) 	
# 				if(bruteforce){ 
# 					xilr2[w[, indM[i]], 1] <- ifelse(yhat2[w[, indM[i]]] <= phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2[w[, indM[i]]] )
# 				} else {
# #					s <- sd(reg1$res, na.rm=TRUE)
# 					s <- sqrt(sum(reg1$res^2)/(nrow(xilr2)-ncol(xilr2)))
# 					ex <- (phi - yhat2)/s 
# #####################################################
# # CHANGED PF:
# #PF#                                    if(all(dnorm(ex) > 5 * .Machine$double.eps)) yhat2 <- yhat2 - s*dnorm(ex)/pnorm(ex)
#                                         yhat2sel <- ifelse(dnorm(ex[w[, indM[i]]]) > .Machine$double.eps,
#                                                            yhat2[w[, indM[i]]] - s*dnorm(ex[w[, indM[i]]])/pnorm(ex[w[, indM[i]]]),
#                                                            yhat2[w[, indM[i]]])
#                                         if(any(is.na(yhat2)) || any(yhat2=="Inf")) stop("Problems in ilr because of infinite or NA estimates")
#                                         # check if we are under the DL:
#                                         if(any(yhat2sel >= phi[w[, indM[i]]])){
# 						yhat2sel <- ifelse(yhat2sel > phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2sel)
# 					}
#                                         xilr2[w[, indM[i]], 1] <- yhat2sel
# #####################################################
# 		        }
# 			}
# 			if(method == "roundedZeroRobust"){
# 				xilr <- ilrM(x)
# 				x[x < .Machine$double.eps] <- 0.00000000001  ## TODO: better solution 
# 				phi <- ilrM(cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE]), info=FALSE)[,1] # TODO: phi auserhalb der Schleife!
# 				xilr2 <- data.frame(xilr$xilr)
# 				c1 <- colnames(xilr2)[1]
# 				colnames(xilr2)[1] <- "V1"
# 				reg1 = rlm(V1 ~ ., data=xilr2, method="MM",maxit = 100)
# #	            reg1 = lmrob(V1 ~ ., data=xilr2)
# 				yhat2 <- predict(reg1, new.data=xilr2[,-i]) 	
# 				if(bruteforce){ 
# 					xilr2[w[, indM[i]], 1] <- ifelse(yhat2[w[, indM[i]]] <= phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2[w[, indM[i]]] )
# 				} else {
# #						s <- mad(reg1$res, na.rm=TRUE)
# 					s <- reg1$s
# 					#PF# s <- IQR(reg1$resid)/1.349
# 					ex <- (phi - yhat2)/s 
# #####################################################
# # CHANGED PF:
# #PF#					if(all(dnorm(ex) > 5 * .Machine$double.eps)) yhat2 <- yhat2 - s*dnorm(ex)/pnorm(ex)
# 					yhat2sel <- ifelse(dnorm(ex[w[, indM[i]]]) > .Machine$double.eps,
# 					                   yhat2[w[, indM[i]]] - s*dnorm(ex[w[, indM[i]]])/pnorm(ex[w[, indM[i]]]), 
# 					                   yhat2[w[, indM[i]]])
# 					if(any(is.na(yhat2)) || any(yhat2=="Inf")) stop("Problems in ilr because of infinite or NA estimates")
# 					# check if we are under the DL:
#                                         if(any(yhat2sel >= phi[w[, indM[i]]])){
# 						yhat2sel <- ifelse(yhat2sel > phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2sel)
# 					}
# 					xilr2[w[, indM[i]], 1] <- yhat2sel
# #####################################################
# 				}
# 			}
# 			
# 			xilr$xilr <- xilr2 
# 			x=invilrM(xilr)		
# 			## return the order of columns:
# 			xNA=x[,1]
# 			x1=x[,indM[i]]
# 			x[,1]=x1
# 			x[,indM[i]]=xNA
# 			}
# 
# 
#  	   }
# 
# 
#   	  criteria <- sum( ((xold - x)/x)^2, na.rm=TRUE) ## DIRTY: (na.rm=TRUE)
# 	  colnames(x) <- colnames(xcheck)
# 
# 	}
# 		
# 	res <- list(xOrig=xcheck, xImp=x, criteria=criteria, iter=it, 
# 			    maxit=maxit, w=length(which(w)), wind=w)
# 	class(res) <- "imp"
# 	invisible(res)
# }
# 

Try the robCompositions package in your browser

Any scripts or data that you put into this service are public.

robCompositions documentation built on Jan. 13, 2021, 10:07 p.m.