Nothing
#' EM-based replacement of rounded zeros in compositional data
#'
#' Parametric replacement of rounded zeros for compositional data using
#' classical and robust methods based on ilr coordinates with a special
#' choice of balances.
#'
#' Statistical analysis of compositional data including zeros runs into
#' problems, because log-ratios cannot be applied. Usually, rounded zeros are
#' considerer as missing not at random missing values.
#'
#' The algorithm iteratively imputes parts with rounded zeros whereas in each
#' step (1) compositional data are expressed in pivot coordinates (2) tobit regression is
#' applied (3) the rounded zeros are replaced by the expected values (4) the
#' corresponding inverse ilr mapping is applied. After all parts are
#' imputed, the algorithm starts again until the imputations do not change.
#'
#' @aliases imputeBDLs print.replaced checkData adjustImputed
#' @param x data.frame or matrix
#' @param maxit maximum number of iterations
#' @param eps convergency criteria
#' @param method either "lm", "lmrob" or "pls"
#' @param dl Detection limit for each variable. zero for variables with
#' variables that have no detection limit problems.
#' @param variation, if TRUE those predictors are chosen in each step, who's variation is lowest to the predictor.
#' @param nPred, if determined and variation equals TRUE, it fixes the number of predictors
#' @param nComp if determined, it fixes the number of pls components. If
#' \dQuote{boot}, the number of pls components are estimated using a
#' bootstraped cross validation approach.
#' @param bruteforce sets imputed values above the detection limit to the
#' detection limit. Replacement above the detection limit are only exeptionally
#' occur due to numerical instabilities. The default is FALSE!
#' @param noisemethod adding noise to imputed values. Experimental
#' @param noise TRUE to activate noise (experimental)
#' @param R number of bootstrap samples for the determination of pls
#' components. Only important for method \dQuote{pls}.
#' @param correction normal or density
#' @param verbose additional print output during calculations.
#' @param test an internal test situation (this parameter will be deleted soon)
#' @importFrom cvTools cvFit
#' @importFrom zCompositions multRepl
#' @importFrom fpc pamk
#' @import pls
#' @return \item{x }{imputed data} \item{criteria }{change between last and
#' second last iteration} \item{iter }{number of iterations} \item{maxit
#' }{maximum number of iterations} \item{wind}{index of zeros}
#' \item{nComp}{number of components for method pls} \item{method}{chosen
#' method}
#' @author Matthias Templ, method subPLS from Jiajia Chen
#' @references Templ, M., Hron, K., Filzmoser, P., Gardlo, A. (2016).
#' Imputation of rounded zeros for high-dimensional compositional data.
#' \emph{Chemometrics and Intelligent Laboratory Systems}, 155, 183-190.
#'
#' Chen, J., Zhang, X., Hron, K., Templ, M., Li, S. (2018).
#' Regression imputation with Q-mode clustering for rounded zero replacement in high-dimensional compositional data.
#' \emph{Journal of Applied Statistics}, 45 (11), 2067-2080.
#' @seealso \code{\link{imputeBDLs}}
#' @keywords manip multivariate
#' @export
#' @importFrom MASS rlm
#' @examples
#'
#' p <- 10
#' n <- 50
#' k <- 2
#' T <- matrix(rnorm(n*k), ncol=k)
#' B <- matrix(runif(p*k,-1,1),ncol=k)
#' X <- T %*% t(B)
#' E <- matrix(rnorm(n*p, 0,0.1), ncol=p)
#' XE <- X + E
#' data <- data.frame(pivotCoordInv(XE))
#' col <- ncol(data)
#' row <- nrow(data)
#' DL <- matrix(rep(0),ncol=col,nrow=1)
#' for(j in seq(1,col,2))
#' {DL[j] <- quantile(data[,j],probs=0.06,na.rm=FALSE)}
#'
#' for(j in 1:col)
#' {data[data[,j]<DL[j],j] <- 0}
#' \dontrun{
#' # under dontrun because of long exectution time
#' imp <- imputeBDLs(data,dl=DL,maxit=10,eps=0.1,R=10,method="subPLS")
#' imp
#' imp <- imputeBDLs(data,dl=DL,maxit=10,eps=0.1,R=10,method="pls", variation = FALSE)
#' imp
#' imp <- imputeBDLs(data,dl=DL,maxit=10,eps=0.1,R=10,method="lm")
#' imp
#' imp <- imputeBDLs(data,dl=DL,maxit=10,eps=0.1,R=10,method="lmrob")
#' imp
#'
#' data(mcad)
#' ## generate rounded zeros artificially:
#' x <- mcad
#' x <- x[1:25, 2:ncol(x)]
#' dl <- apply(x, 2, quantile, 0.1)
#' for(i in seq(1, ncol(x), 2)){
#' x[x[,i] < dl[i], i] <- 0
#' }
#' ni <- sum(x==0, na.rm=TRUE)
#' ni/(ncol(x)*nrow(x)) * 100
#' dl[seq(2, ncol(x), 2)] <- 0
#' replaced_lm <- imputeBDLs(x, dl=dl, eps=1, method="lm",
#' verbose=FALSE, R=50, variation=TRUE)$x
#' replaced_lmrob <- imputeBDLs(x, dl=dl, eps=1, method="lmrob",
#' verbose=FALSE, R=50, variation=TRUE)$x
#' replaced_plsfull <- imputeBDLs(x, dl=dl, eps=1,
#' method="pls", verbose=FALSE, R=50,
#' variation=FALSE)$x
#' }
#'
#'
#'
`imputeBDLs` <-
function(x, maxit=10, eps=0.1, method="subPLS",
dl=rep(0.05, ncol(x)), variation=TRUE, nPred=NULL,
nComp = "boot", bruteforce=FALSE,
noisemethod="residuals", noise=FALSE, R=10,
correction="normal", verbose=FALSE, test = FALSE){
## needed for visible global function definition
isomLRInvp <- 1
isomLRinv <- function(x){
pivotCoordInv(x = x)
}
## check if all is fine:
# check if values are in (0, dl[i]):
checkDL <- function(x, dl, indexNA){
check <- logical(ncol(x))
for(i in 1:ncol(x)){
critvals <- x[indexNA[,i],i] > dl[i]
check[i] <- any(critvals)
if(check[i]){
x[which(x[indexNA[,i],i] > dl[i]),i] <- dl[i]
}
}
if(any(check) & verbose){
message(paste(sum(critvals), "/", sum(w), "replaced values has been corrected"))
}
x
}
## check if data are fine
checkData(x, dl)
## some specific checks
stopifnot((method %in% c("lm", "MM", "lmrob", "pls", "subPLS")))
if(method=="pls" & ncol(x)<5) stop("too less variables/parts for method pls")
if(!(correction %in% c("normal","density"))){
stop("correction method must be normal or density")
}
if(method == "pls" & variation){
stop("if variation is TRUE then pls is not supported.")
}
## how to deal with user input on nComp
if(is.null(nComp)){
pre <- FALSE
nC <- NULL
} else if(nComp=="boot"){
nC <- integer(ncol(x))
pre <- TRUE
} else if(length(nComp) == ncol(x)){
nC <- nComp
pre <- FALSE
} else {
pre <- FALSE
}
## store some important values
n <- nrow(x)
d <- ncol(x)
rs <- rowSums(x, na.rm = TRUE)
if(method == "subPLS"){
w <- x == 0
indexFinalCheck <- x == 0
mulzero <- zCompositions::multRepl(x,label=0,dl=dl)
dd <- as.dist(robCompositions::variation(mulzero))
g <- fpc::pamk(dd)$nc
pos <- as.matrix(cutree(hclust(dd, method="ward.D"),g))
indNA <- apply(x,2,function(x){any(x==0)})
gz <- intersect(order(-table(pos[indNA])),which(table(pos)>1))
fun.PLS <- function(x,data,dl,pos,b,R)
{
data_b <- data[,pos==b]
data_nb <- data[,pos!=b]
col1 <- ncol(data_b)
row <- nrow(data_b)
u <- cbind(cenLR(data_b)$x,cenLR(data_nb)$x)
u1 <- u[,1:col1]
n <- bootnComp(u[,-(1:col1),drop=FALSE],as.matrix(u1),R,plotting=FALSE)$res
# form <- paste(paste(colnames(u1), collapse = "+"), "~", paste(colnames(u), collapse = "+"))
# form <- as.formula(form)
# pls <- mvr(form, data=cbind(u1, u), method="simpls",
# ncomp=n)
pls <- mvr(as.matrix(u1)~ as.matrix(u[,-(1:col1)]),ncomp=n,method="simpls")
# pls <- mvr(u1~ u[,-(1:col1)],ncomp=n,method="simpls")
mean <- matrix(predict(pls,ncomp=n),ncol=col1)
sigma <- cov(u1-mean)
sig_b <- x[,pos==b]==0
cz <- which(colSums(sig_b)!=0)
rz <- which(rowSums(sig_b)!=0)
lj <- dl[pos==b]
for(j in cz)
{
linjie <- as.matrix(cenLR(cbind(rep(lj[j],row),data_b[,-j,drop=FALSE]))$x[,1,drop=FALSE])
u1[sig_b[,j],j] <- mean[sig_b[,j],j]-sqrt(sigma[j,j])*(dnorm((linjie[sig_b[,j]]-mean[sig_b[,j],j])/sqrt(sigma[j,j]))/pnorm((linjie[sig_b[,j]]-mean[sig_b[,j],j])/sqrt(sigma[j,j])))
}
for(i in rz)
{if(rowSums(sig_b)[i]<col1)
{u1[i,!sig_b[i,]] <- (sum(!sig_b[i,])*u1[i,!sig_b[i,]]-rep(sum(u1[i,]),sum(!sig_b[i,])))/(sum(!sig_b[i,]))}
}
chabu <- adjustImputed(cenLRinv(u1)[rowSums(sig_b)<col1,],x[rowSums(sig_b)<col1,pos==b],sig_b[rowSums(sig_b)<col1,])
data[rowSums(sig_b)<col1,pos==b] <- chabu
return(data)
}
it <- 1
criteria <- 99999999
impute <- mulzero
while(it <= maxit & criteria >= eps){
xold <- impute
for (m in 1:length(gz))
{impute <- fun.PLS(x,impute,dl,pos,gz[m],R)}
it <- it+1
criteria <- sum(((xold-impute)/impute)^2, na.rm = TRUE)
}
impute <- checkDL(impute, dl, indexFinalCheck)
res <- list(x=impute, criteria=criteria, iter=it,
maxit=maxit, wind=w, nComp=nC, nPred=nPred,
variation=variation,
method=method, dl=dl)
class(res) <- "replaced"
return(res)
}
# if(method == "pls_clust"){
# indexFinalCheck <- x == 0
# mulzero <- zCompositions::multRepl(x,label=0,dl=dl)
# dd <- as.dist(robCompositions::variation(mulzero))
# g <- fpc::pamk(dd)$nc
# pos <- as.matrix(cutree(hclust(dd, method="ward.D"),g))
# indNA <- apply(x,2,function(x){any(x==0)})
# gz <- intersect(order(-table(pos[indNA])),which(table(pos)>1))
#
# fun.PLS <- function(x,data,dl,pos,b,R)
# {
# data_b <- data[,pos==b]
# data_nb <- data[,pos!=b]
# col1 <- ncol(data_b)
# row <- nrow(data_b)
# u <- cbind(cenLR(data_b)$x.clr, cenLR(data_nb)$x.clr)
# u1 <- u[,1:col1]
# n <- bootnComp(u[,-(1:col1),drop=FALSE],u1,R,plotting=FALSE)$res
# pls <- mvr(u1~ u[,-(1:col1)],ncomp=n,method="simpls")
# mean <- matrix(predict(pls,ncomp=n),ncol=col1)
# sigma <- cov(u1-mean)
# sig_b <- x[,pos==b]==0
# cz <- which(colSums(sig_b)!=0)
# rz <- which(rowSums(sig_b)!=0)
# lj <- dl[pos==b]
# for(j in cz)
# {
# linjie <- clr(cbind(rep(lj[j],row),data_b[,-j,drop=FALSE]))[,1,drop=FALSE]
# u1[sig_b[,j],j] <- mean[sig_b[,j],j]-sqrt(sigma[j,j])*(dnorm((linjie[sig_b[,j]]-mean[sig_b[,j],j])/sqrt(sigma[j,j]))/pnorm((linjie[sig_b[,j]]-mean[sig_b[,j],j])/sqrt(sigma[j,j])))
# }
# for(i in rz)
# {if(rowSums(sig_b)[i]<col1)
# {u1[i,!sig_b[i,]] <- (sum(!sig_b[i,])*u1[i,!sig_b[i,]]-rep(sum(u1[i,]),sum(!sig_b[i,])))/(sum(!sig_b[i,]))}
# }
# chabu <- adjustImputed(clrInv(u1)[rowSums(sig_b)<col1,],x[rowSums(sig_b)<col1,pos==b],sig_b[rowSums(sig_b)<col1,])
# data[rowSums(sig_b)<col1,pos==b] <- chabu
# return(data)
# }
#
# it <- 1
# criteria <- 99999999
# impute <- mulzero
# while(it <= maxit & criteria >= eps){
# xold <- impute
# for (m in 1:length(gz))
# {impute <- fun.PLS(x,impute,dl,pos,gz[m],R)}
# it <- it+1
# criteria <- sum(((xold-impute)/impute)^2, na.rm = TRUE)
# }
# impute <- checkDL(impute, dl, indexFinalCheck)
#
# res <- list(x=impute, criteria=criteria, iter=it,
# maxit=maxit, wind=w, nComp=nC, nPred=nPred,
# variation=variation,
# method=method, dl=dl)
# class(res) <- "replaced"
# return(res)
# }
## set zeros to NA for easier handling
x[x == 0] <- NA
x[x < 0] <- NA
indexFinalCheck <- is.na(x)
## sort variables of x based on
## decreasing number of zeros in the variables
cn <- colnames(x)
wcol <- - abs(apply(x, 2, function(x) sum(is.na(x))))
o <- order(wcol)
x <- x[,o]
if(verbose) cat("number of variables with zeros:\n", sum(wcol != 0))
## --> now work in revised order of variables
## dl must also be in correct order
dlordered <- dl[o]
#################
## index of missings / non-missings
w <- is.na(x)
wn <- !is.na(x)
xcheck <- x
w2 <- is.na(x)
## initialisation
indNA <- apply(x, 2, function(x){any(is.na(x))})
# print(indNA)
for(i in 1:length(dl)){
ind <- is.na(x[,i])
# if(length(ind) > 0) x[ind,i] <- dl[i]*runif(sum(ind),1/3,2/3)
if(length(ind) > 0) x[ind,i] <- dlordered[i] *2/3
}
xOrig <- x
## nPred if variation == TRUE and cv for number of predictors
## evaluate the vector containing the amount of predictors
if(!is.null(nPred) & length(nPred) == 1){
nPred <- rep(nPred, ncol(x))
}
if(!is.null(nPred) & length(nPred) > 1){
stop("nPred must be NULL or a vector of length 1.")
}
if(is.null(nPred) & variation){
ptmcv <- proc.time()
if(verbose) cat("\n cross validation to estimate number of predictors\n")
ii <- 1
if(verbose) pb <- txtProgressBar(min = 0, max = sum(indNA), style = 3)
nPred <- numeric(nrow(x))
rtmspe <- NULL
for(i in which(indNA)){
xneworder <- cbind(x[, i, drop=FALSE], x[, -i, drop=FALSE])
rv <- variation(x, method = "Pairwise")[1,]
cve <- numeric()
for(np in seq(3, min(c(27,ncol(x),floor(nrow(x)/2))), 3)){
s <- sort(rv)[np]
cols <- which(rv <= s)[1:np]
xn <- xneworder[, cols]
if(test) xilr <- data.frame(isomLRp(xn)) else xilr <- data.frame(pivotCoord(xn))
colnames(xilr)[1] <- "Y"
call <- call(method, formula = Y ~ .)
# perform cross-validation
cve[np] <- suppressWarnings(cvFit(call, data = xilr,
y = xilr$Y,
cost = cvTools::rtmspe,
K = 5, R = 1, costArgs = list(trim = 0.1), seed = 1234)$cv)
}
nPred[i] <- which.min(cve)
## update progress bar
if(verbose) setTxtProgressBar(pb, ii); ii <- ii + 1
}
if(verbose) close(pb)
ptmcv <- proc.time() - ptmcv
}
###############################
### start the iteration ###
###############################
if(verbose) cat("\n start the iteration:")
it <- 1; criteria <- 99999999
while(it <= maxit & criteria >= eps){
if(verbose) cat("\n iteration", it, "; criteria =", criteria)
xold <- x
for(i in which(indNA)){
if(verbose) cat("\n replacement on (sorted) part", i)
## sort data columns first
## i'th positions must be first
xneworder <- cbind(x[, i, drop=FALSE], x[, -i, drop=FALSE])
## if based on variation matrix:
if(variation){
orig <- xneworder
rv <- variation(x, method = "Pairwise")[1,]
s <- sort(rv)[nPred[i]]
cols <- which(rv <= s)[1:nPred[i]]
xneworder <- xneworder[, cols]
}
## factors for preserving abs values:
# fac <- multis(xneworder)
## detection limit in ilr-space
forphi <- cbind(rep(dlordered[i], n), xneworder[,-1,drop=FALSE])
if(any(is.na(forphi))) break()
if(test) phi <- isomLRp(forphi)[,1] else phi <- pivotCoord(forphi)[,1]
# part <- cbind(x[,i,drop=FALSE], x[,-i,drop=FALSE])
xneworder[xneworder < 2*.Machine$double.eps] <- 2*.Machine$double.eps
if(test) xilr <- data.frame(isomLRp(xneworder)) else xilr <- data.frame(pivotCoord(xneworder))
# c1 <- colnames(xilr)[1]
# colnames(xilr)[1] <- "V1"
response <- as.matrix(xilr[, 1, drop=FALSE])
predictors <- as.matrix(xilr[, -1, drop=FALSE])
if(method=="lm"){
reg1 <- lm(response ~ predictors)
yhat <- predict(reg1, newdata=data.frame(predictors))
} else if(method=="MM" | method=="lmrob"){
reg1 <- MASS::rlm(response ~ predictors, method="MM",maxit = 100)#rlm(V1 ~ ., data=xilr2, method="MM",maxit = 100)
yhat <- predict(reg1, newdata=data.frame(predictors))
} else if(method=="pls"){
if(it == 1 & pre){ ## evaluate ncomp.
nC[i] <- bootnComp(xilr[, 2:ncol(xilr), drop=FALSE], y=xilr[, 1], R,
plotting=FALSE)$res #$res2
}
if(verbose) cat(" ; ncomp:", nC[i])
reg1 <- mvr(as.matrix(response) ~ as.matrix(predictors), ncomp=nC[i], method="simpls")
yhat <- predict(reg1, newdata=data.frame(predictors), ncomp=nC[i])
}
# s <- sqrt(sum(reg1$res^2)/abs(nrow(xilr)-ncol(xilr))) ## quick and dirty: abs()
s <- sqrt(sum(reg1$res^2)/nrow(xilr))
yhat <- as.numeric(yhat)
ex <- (phi - yhat)/s
if(correction=="normal"){
yhat2sel <- ifelse(dnorm(ex[w[, i]]) > .Machine$double.eps,
yhat[w[, i]] - s*dnorm(ex[w[, i]])/pnorm(ex[w[, i]]),
yhat[w[, i]])
} else if(correction=="density"){
stop("correction equals density is no longer be supported")
# den <- density(ex[w[,i]])
# distr <- sROC::kCDF(ex[w,i])
}
if(any(is.na(yhat)) || any(yhat=="Inf")) stop("Problems in ilr because of infinite or NA estimates")
# check if we are under the DL:
if(any(yhat2sel >= phi[w[, i]])){
yhat2sel <- ifelse(yhat2sel > phi[w[, i]], phi[w[, i]], yhat2sel)
}
xilr[w[, i], 1] <- yhat2sel
if(test) xinv <- isomLRInvp(xilr) else xinv <- pivotCoordInv(xilr)
## if variation:
if(variation == TRUE){
xneworder <- adjustImputed(xinv, xneworder, w2[, cols])
orig[, cols] <- xneworder #* fac
xinv <- orig
}
## reordering of xOrig
if(i %in% 2:(d-1)){
xinv <- cbind(xinv[,2:i], xinv[,c(1,(i+1):d)])
}
if(i == d){
xinv <- cbind(xinv[,2:d], xinv[,1])
}
if(!variation){
x <- adjustImputed(xinv, xOrig, w2)
} else {
x <- xinv
}
# x <- adjust3(xinv, xOrig, w2)
# ## quick and dirty:
# x[!w] <- xOrig[!w]
}
it <- it + 1
criteria <- sum( ((xold - x)/x)^2, na.rm=TRUE) ## DIRTY: (na.rm=TRUE)
if(verbose & criteria != 0) cat("\n iteration", it, "; criteria =", criteria)
}
#### add random error ###
if(noise){
for(i in which(indNA)){
if(verbose) cat("\n add noise on variable", i)
# add error terms
inderr <- w[,i]
if(noisemethod == "residuals") {
error <- sample(residuals( reg1 )[inderr],
size=wcol[i], replace=TRUE)
reg1$res[inderr] <- error
} else {
mu <- median(residuals( reg1 )[inderr])
sigma <- mad(residuals( reg1 )[inderr])
error <- rnorm(wcol[i], mean=mu, sd=sigma)
reg1$res[inderr] <- error
}
# return realizations
yhat[inderr] <- yhat[inderr] + error
s <- sqrt(sum(reg1$res^2)/nrow(xilr)) ## quick and dirty: abs()
ex <- (phi - yhat)/s
yhat2sel <- ifelse(dnorm(ex[w[, i]]) > .Machine$double.eps,
yhat[w[, i]] - s*dnorm(ex[w[, i]])/pnorm(ex[w[, i]]),
yhat[w[, i]])
if(any(is.na(yhat)) || any(yhat=="Inf")) stop("Problems in ilr because of infinite or NA estimates")
# check if we are under the DL:
if(any(yhat2sel >= phi[w[, i]])){
yhat2sel <- ifelse(yhat2sel > phi[w[, i]], phi[w[, i]], yhat2sel)
}
xilr[w[, i], 1] <- yhat2sel
if(test) xinv <- isomLRInvp(xilr) else xinv <- pivotCoordInv(xilr)
## reordering of xOrig
if(i %in% 2:(d-1)){
xinv <- cbind(xinv[,2:i], xinv[,c(1,(i+1):d)])
}
if(i == d){
xinv <- cbind(xinv[,2:d], xinv[,1])
}
# x <- adjust2(xinv, xOrig, w)
# ## quick and dirty:
# x[!w] <- xOrig[!w]
}
}
### end add random error ###
# x <- adjust3(x, xOrig, w)
# x[!w] <- xOrig[!w]
x <- x[,order(o)] ## checked: reordering is OK!
colnames(x) <- cn
if(verbose){
message(paste(sum(w)), "values below detection limit has been imputed \n below the corresponding detection limits")
}
x <- checkDL(x, dl, indexFinalCheck)
res <- list(x=x, criteria=criteria, iter=it,
maxit=maxit, wind=w, nComp=nC, nPred=nPred,
variation=variation,
method=method, dl=dl)
class(res) <- "replaced"
return(res)
}
#' @rdname imputeBDLs
#' @export
#' @param xImp imputed data set
#' @param xOrig original data set
#' @param wind index matrix of rounded zeros
adjustImputed <- function(xImp, xOrig, wind){
## aim:
## (1) ratios must be preserved
## (2) do not change original values
## (3) adapt imputations
xneu <- xImp
s1 <- rowSums(xOrig, na.rm = TRUE)
## per row: consider rowsums of imputed data
sumPrevious <- sumAfter <- numeric(nrow(xImp))
for (i in 1:nrow(xImp)) {
if(any(wind[i, ]) & !all(wind[i,])){
sumPrevious[i] <- sum(xOrig[i, !wind[i, ]])
sumAfter[i] <- sum(xImp[i, !wind[i,]])
} else{
sumPrevious[i] <- sumAfter[i] <- 1
}
}
# how much is rowsum increased by imputation:
fac <- sumPrevious/sumAfter
# # decrese rowsums of orig.
# s1[i] <- s1[i]/fac
# }
## for non-zeros overwrite them:
xneu[!wind] <- xOrig[!wind]
## adjust zeros:
for(i in 1:nrow(xImp)){
if(any(wind[i,])){
xneu[i,wind[i,]] <- fac[i]*xneu[i,wind[i,]]
}
}
return(xneu)
}
#' @rdname imputeBDLs
#' @export
`checkData` <- function(x, dl){
if(any(is.na(x))) stop("your data includes missing values.\n Use impKNNa() or impCoda() to impute them first.")
if( is.vector(x) ) stop("x must be a matrix or data frame")
## check if only numeric variables are in x:
cl <- lapply(x, class)
if(!all(cl %in% "numeric")) stop("some of your variables are not of class numeric.")
if( length(dl) < ncol(x)) stop(paste("dl has to be a vector of ", ncol(x)))
if(any(is.na(x))) stop("missing values are not allowed. \n Use impKNNa or impCoda to impute them first.")
# pre <- TRUE
# if(length(nComp) != ncol(x) & nComp!="boot") stop("nComp must be NULL, boot or of length ncol(x)")
# } else if(nComp == "boot"){#
# pre <- TRUE
# } else {
# pre <- FALSE
# }
#################
## zeros to NA:
# check if values are in (0, dl[i]):
check <- logical(ncol(x))
for(i in 1:ncol(x)){
# check[i] <- any(x[,i] < dl[i] & x[,i] != 0)
x[x[,i] < dl[i],i] <- 0
}
# if(any(check)){warning("values below detection limit have been set to zero and will be imputed")}
check2 <- any(x < 0)
if(check2){warning("values below 0 set have been set to zero and will be imputed")}
x[x == 0] <- NA
x[x < 0] <- NA
indexFinalCheck <- is.na(x)
## check if rows consists of only zeros:
checkRows <- unlist(apply(x, 1, function(x) all(is.na(x))))
if(any(checkRows)){
w <- which(checkRows)
cat("\n--------\n")
message("Rows with only zeros are not allowed")
message("Remove this rows before running the algorithm")
cat("\n--------\n")
stop(paste("Following rows with only zeros:", w))
}
## check if cols consists of only zeros:
checkCols <- unlist(apply(x, 2, function(x) all(is.na(x))))
if(any(checkCols)){
w <- which(checkCols)
cat("\n--------\n")
message("Cols with only zeros are not allowed")
message("Remove this columns before running the algorithm")
cat("\n--------\n")
stop(paste("\n Following cols with only zeros:", colnames(x)[w]))
}
indNA <- apply(x, 2, function(x){any(is.na(x))})
## check if for any variable with zeros,
## the detection limit should be larger than 0:
if(any(dl[indNA]==0)){
w <- which(dl[indNA]==0)
invalidCol <- colnames(x)[w]
for(i in 1:length(invalidCol)){
cat("-------\n")
cat(paste("Error: variable/part", invalidCol[i],
"has detection limit 0 but includes zeros"))
cat("\n-------\n")
}
stop(paste("Set detection limits larger than 0 for variables/parts \n including zeros"))
}
}
#' @rdname imputeBDLs
#' @method print replaced
#' @export
#' @param ... further arguments passed through the print function
print.replaced <- function(x, ...){
message(paste("\n", sum(x$w), "values below detection limit were imputed \n below their corresponding detection limits.\n"))
}
#' Bootstrap to find optimal number of components
#'
#' Combined bootstrap and cross validation procedure to find optimal number of
#' PLS components
#'
#' Heavily used internally in function impRZilr.
#'
#' @param X predictors as a matrix
#' @param y response
#' @param R number of bootstrap replicates
#' @param plotting if TRUE, a diagnostic plot is drawn for each bootstrap
#' replicate
#' @return Including other information in a list, the optimal number of
#' components
#' @author Matthias Templ
#' @seealso \code{\link{impRZilr}}
#' @keywords manip
#' @export
#' @examples
#'
#' ## we refer to impRZilr()
#'
bootnComp <- function(X, y, R=99, plotting=FALSE){
ind <- 1:nrow(X)
d <- matrix(, ncol=R, nrow=nrow(X))#nrow(X))
nc <- integer(R)
for(i in 1:R){
bootind <- sample(ind)
# XX <- X
# yy <- y
if(is.null(ncol(y))){
ds <- cbind(X[bootind,], as.numeric(y[bootind]))
colnames(ds)[ncol(ds)] <- "V1"
res1 <- mvr(V1~., data=data.frame(ds), method="simpls",
validation="CV")
} else {
ds <- cbind(X[bootind,], as.matrix(y[bootind,]))
form <- paste(paste(colnames(ds[, (ncol(X)+1):ncol(ds)]), collapse = "+"), "~", paste(colnames(ds[, 1:ncol(X)]), collapse = "+"))
form <- as.formula(form)
res1 <- mvr(form, data=data.frame(ds), method="simpls",
validation="CV")
}
d[1:res1$ncomp, i] <- res1$validation$PRESS
nc[i] <- which.min(res1$validation$PRESS)
# d[1:reg1$ncomp,i] <- as.numeric(apply(reg1$validation$pred, 3,
# function(x) sum(((y - x)^2)) ) )
}
d <- na.omit(d)
sdev <- apply(d, 1, sd, na.rm=TRUE)
means <- apply(d, 1, mean, na.rm=TRUE)
mi <- which.min(means)
r <- round(ncol(X)/20)
mi2 <- which.min(means[r:length(means)])+r-1
w <- means < min(means) + sdev[mi]
threshold <- min(means) + sdev[mi]
sdev <- sdev
means <- means
mi <- mi
means2 <- means
means2[!w] <- 999999999999999
res <- which.min(means2)
mi3 <- which.max(w)
# minsd <- means - sdev > means[mi]
# check <- means
# check[!minsd] <- 99999999
if(plotting) plot(means, type="l")
# res <- which.min(check)
list(res3=mi3, res2=mi2, res=res, means=means)
}
bootnCompHD <- function(X,y, R=99, plotting=FALSE){
ind <- 1:nrow(X)
d <- matrix(, ncol=R, nrow=nrow(X))#nrow(X))
for(i in 1:R){
bootind <- sample(ind)
XX <- X
yy <- y
ds <- cbind(X[bootind,], as.numeric(y[bootind]))
colnames(ds)[ncol(ds)] <- "V1"
reg1 <- mvr(V1~., data=data.frame(ds), method="simpls", validation="CV")
d[1:reg1$ncomp,i] <- as.numeric(apply(reg1$validation$pred, 3, function(x) sum(((y - x)^2)) ) )
}
d <- na.omit(d)
sdev <- apply(d, 1, mad, na.rm=TRUE)
means <- apply(d, 1, median, na.rm=TRUE)
mi <- which.min(means)
if(plotting) plot(means, type="l", col="blue", ylab="squared total prediction error", xlab="number of components")
themean <- mean(means)
thesd <- sd(means)
abovethreshold <- themean - sdev > means
check <- means
check[!abovethreshold] <- 9999999999
res <- which.min(check)
# minsd <- means - sdev > means[mi]
# check <- means
# check[!minsd] <- 99999999
# res <- which.min(check)
if(plotting){
abline(v=res, lwd=3)
abline(h=mi, col="red")
# abline(h=means-sdev, lty=3)
}
list(res=res, mean=means)
}
#checkIfValuesUnderDL <- function(x, dl, wind){
# check <- logical(ncol(x))
# for(i in 1:ncol(x)){
# check[i] <- any(x[,i] > dl[i])
# }
# return(check)
#}
## test adjust2:
adjust2 <- function (xImp, xOrig, wind){
## aim: do not change original values
## adapt imputations
xneu = xImp
s1 <- rowSums(xOrig, na.rm = TRUE)
## per row: consider rowsums of imputed data
## example:
## wind: F F T F F
## ganz orig: 3 5 NA 8 10 (sum=26)
## orig(init): 3 5 6.5 8 10 (sum=32.5)
## imp: 3 5 7 8 10 (sum=33)
## s: 26
## s2: 7
## fac: 26/(26+7)
## s1: 32.5/(26/(26+7)) = 41.25
for (i in 1:nrow(xImp)) {
if(any(wind[i,])) s <- sum(xImp[i, !wind[i, ]]) else s <- 1
if(any(wind[i,])) s2 <- sum(xImp[i, wind[i, ]]) else s2 <- 0
# how much is rowsum increased by imputation:
fac <- s/(s + s2)
# decrese rowsums of orig.
s1[i] <- s1[i]/fac
}
## impS: 41.25/33
impS <- s1/rowSums(xImp)
for (i in 1:ncol(xImp)) {
xneu[, i] <- xImp[, i] * impS
}
xImp <- xneu
return(xImp)
}
adjust3 <- function(xImp, xOrig, wind){
xOrigSum <- rowSums(xOrig)
# sum imputed without former zeros:
xImpSum <- numeric(ncol(xOrig))
for(i in 1:nrow(xOrig)){
xImpSum[i] <- sum(xImp[i,!wind[i,]])
fac <- xOrigSum[i] / xImpSum[i]
xImp[i,wind[i,]] <- xImp[i,wind[i,]] * fac
}
xImp[!wind] <- xOrig[!wind]
xImp
}
#
### switch function to automatically select methods
#getM <- function(xReg, ndata, type, index,mixedTF,mixedConstant,factors,step,robust,noise,noise.factor=1,force=FALSE, robMethod="MM") {
# switch(type,
# numeric = useLM(xReg, ndata, index, mixedTF,mixedConstant,factors,step,robust,noise,noise.factor,force,robMethod),
# factor = useMN(xReg, ndata, index,factors,step,robust),
# bin = useB(xReg, ndata, index,factors,step,robust),
# count = useGLMcount(xReg, ndata, index, factors, step, robust)
# )
#}
#
#
#
#f <- function(){
## x <- constSum(x)
## dl <- dl/sum(dl)
# ## initialisation:
## x[is.na(x)] <- 0.001
# for(i in 1:length(dl)){
# ind <- is.na(x[,i])
# #PF# if(length(ind) > 0) x[ind,i] <- dl[i]/3*2
# if(length(ind) > 0) x[ind,i] <- dl[i]*runif(sum(ind),1/3,2/3)
# }
#
## x <- constSum(x)
#
# ## parameters:
# it=0
# criteria <- 10000000
# error <- rep(0, ncol(x))
# nComp <- normalerror <- numeric(ncol(x))
# if(noisemethod=="residuals") residuals <- matrix(,ncol=ncol(x), nrow=nrow(x))
# nComp[nComp==0] <- NA
# criteria <- 1e+07
# sigma <- mu <- rep(0, ncol(x))
#
# ###########################################
# ### start the iteration
# if(verbose) cat("\n start the iteration:")
# while(it <= maxit & criteria >= eps){
# xold <- x
# it=it+1
# for(i in which(indNA)){
# if(verbose) cat("\n column", i)
# ## change the first column with that one with the highest amount of NAs
# ## in the step
# if(wcol[indM[i]] > 0){
# xNA=x[,indM[i]]
# x1=x[,1]
# x[,1]=xNA
# x[,indM[i]]=x1
#
# if(method == "roundedZero"){
# xilr <- pivotCoord(x)
# phi <- pivotCoord(cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE]))[,1] # TODO: phi auserhalb der Schleife!
# ## --> x hat sich geaendert aber dl nicht.
# xilr2 <- data.frame(xilr)
# c1 <- colnames(xilr2)[1]
# colnames(xilr2)[1] <- "V1"
# reg1 = lm(V1 ~ ., data=xilr2)
# yhat2 <- predict(reg1, new.data=xilr2[,-i])
# if(bruteforce){
# xilr2[w[, indM[i]], 1] <- ifelse(yhat2[w[, indM[i]]] <= phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2[w[, indM[i]]] )
# } else {
# s <- sqrt(sum(reg1$res^2)/(nrow(xilr2)-ncol(xilr2)))
# ex <- (phi - yhat2)/s
# yhat2sel <- ifelse(dnorm(ex[w[, indM[i]]]) > .Machine$double.eps,
# yhat2[w[, indM[i]]] - s*dnorm(ex[w[, indM[i]]])/pnorm(ex[w[, indM[i]]]),
# yhat2[w[, indM[i]]])
# if(any(is.na(yhat2)) || any(yhat2=="Inf")) stop("Problems in ilr because of infinite or NA estimates")
# # check if we are under the DL:
# if(any(yhat2sel >= phi[w[, indM[i]]])){
# yhat2sel <- ifelse(yhat2sel > phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2sel)
# }
# xilr2[w[, indM[i]], 1] <- yhat2sel
# }
# }
# if (method == "pls") {
# xilr = pivotCoord(x)
# ttt <- cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE])
# phi <- pivotCoord(cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE]))[,1]
## if(verbose) cat("\n phi", phi, "in iteration", it)
# xilr2 <- data.frame(xilr)
# c1 <- colnames(xilr2)[1]
# colnames(xilr2)[1] <- "V1"
# if(it == 1){ ## evaluate ncomp.
# test <- xilr2[,!(colnames(xilr2) == "V1")]
# testy <- xilr2[,"V1"]
# X <- xilr2[,!(colnames(xilr2) == "V1")]
# y <- xilr2[,"V1"]
# nComp[i] <- bootnComp(xilr2[,!(colnames(xilr2) == "V1")],y=xilr2[,"V1"], R)
# }
# reg1 <- mvr(V1~.,ncomp=nComp[i], data = xilr2, method="simpls")
# yhat2 <- as.numeric(predict(reg1, new.data=xilr2[,-i], ncomp=nComp[i]) )
## if(noisemethod=="residuals") residuals[,i] <- reg1$residuals[,,nComp[i]]
## if(noisemethod=="normal"){
## mu[i] <- median(residuals(reg1)[,,nComp[i]])
## sigma[i] <- mad(residuals(reg1)[,,nComp[i]]) * noiseeffect
## }
# colnames(xilr2)[1] <- c1
## fit=reg1$fitted.values[,,nComp[i]]
# s <- sqrt(sum(reg1$residuals[,,nComp[i]]^2)/abs(nrow(xilr2)-ncol(xilr2)))
# ex <- as.numeric((phi - yhat2)/s )
# yhat2sel <- ifelse(dnorm(ex[w[, indM[i]]]) > .Machine$double.eps,
# yhat2[w[, indM[i]]] - s*dnorm(ex[w[, indM[i]]])/pnorm(ex[w[, indM[i]]]),
# yhat2[w[, indM[i]]])
# if(any(is.na(yhat2)) || any(yhat2=="Inf")) stop("Problems in ilr because of NaN or NA estimates")
# # check if we are under the DL:
# if(any(yhat2sel >= phi[w[, indM[i]]])){
# yhat2sel <- ifelse(yhat2sel > phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2sel)
# }
## if(verbose) cat("\n yhat2sel at iteration", it, "on column", i ,"is", yhat2sel)
# xilr2[w[, indM[i]], 1] <- yhat2sel
#
#
# }
# if(method == "roundedZeroRobust"){
# xilr <- pivotCoord(x)
# x[x < .Machine$double.eps] <- 0.00000000001 ## TODO: better solution
# phi <- pivotCoord(cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE]))[,1] # TODO: phi auserhalb der Schleife!
# xilr2 <- data.frame(xilr)
# c1 <- colnames(xilr2)[1]
# colnames(xilr2)[1] <- "V1"
# reg1 = rlm(V1 ~ ., data=xilr2, method="MM",maxit = 100)
## reg1 = lmrob(V1 ~ ., data=xilr2)
# yhat2 <- predict(reg1, new.data=xilr2[,-i])
# if(bruteforce){
# xilr2[w[, indM[i]], 1] <- ifelse(yhat2[w[, indM[i]]] <= phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2[w[, indM[i]]] )
# } else {
# s <- reg1$s
# #PF# s <- IQR(reg1$resid)/1.349
# ex <- (phi - yhat2)/s
# yhat2sel <- ifelse(dnorm(ex[w[, indM[i]]]) > .Machine$double.eps,
# yhat2[w[, indM[i]]] - s*dnorm(ex[w[, indM[i]]])/pnorm(ex[w[, indM[i]]]),
# yhat2[w[, indM[i]]])
# if(any(is.na(yhat2)) || any(yhat2=="Inf")) stop("Problems in ilr because of infinite or NA estimates")
# # check if we are under the DL:
# if(any(yhat2sel >= phi[w[, indM[i]]])){
# yhat2sel <- ifelse(yhat2sel > phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2sel)
# }
# xilr2[w[, indM[i]], 1] <- yhat2sel
# }
# }
#
# xilr <- xilr2
# x <- pivotCoordInv(xilr)
#
# ## return the order of columns:
# xNA=x[,1]
# x1=x[,indM[i]]
# x[,1]=x1
# x[,indM[i]]=xNA
# x <- adjust2(x, xcheck, w)
# print(x[1:2,1:2])
# if(verbose) cat("\n iteration", it, "column", i, "value", x[2,1])
# }
#
#
# }
#
#
# criteria <- sum( ((xold - x)/x)^2, na.rm=TRUE) ## DIRTY: (na.rm=TRUE)
# colnames(x) <- colnames(xcheck)
#
# }
#
# res <- list(xOrig=xcheck, xImp=x[,order(o)], criteria=criteria, iter=it, nComp=nComp,
# maxit=maxit, w=length(which(w)), wind=w)
# class(res) <- "imp"
## res <- adjust(res)
# invisible(res)
#}
#
#
#
##################################################################################
#
#`impRZilr` <-
#function(x, maxit=10, eps=0.1, method="roundedZero",
# dl=rep(0.05, ncol(x)), bruteforce=FALSE, noise = TRUE, noisemethod="residuals", noiseeffect=1, R=10,
# verbose=FALSE){
#
#
# if( is.vector(x) ) stop("x must be a matrix or data frame")
# stopifnot((method %in% c("ltsReg", "ltsReg2", "classical", "lm", "roundedZero","roundedZeroRobust","pls")))
# if( length(dl) < ncol(x)) stop(paste("dl has to be a vector of ", ncol(x)))
#
# #################
# ## zeros to NA:
# x[x==0] <- NA
#
# #################
# ## index of missings / non-missings
# w <- is.na(x)
# wn <- !is.na(x)
# w2 <- apply(x, 1, function(x){
# length(which(is.na(x)))
# })
# indNA <- apply(x, 2, function(x){any(is.na(x))})
#
# #################
# ## sort the columns of the data according to the amount of missings in the variables
# wcol <- apply(x, 2, function(x) length(which(is.na(x))))
# indM <- sort(wcol, index.return=TRUE, decreasing=TRUE)$ix
# cn <- colnames(x)
# xcheck <- x
#
# ################
# ## detection limit in ilr-space
## for(i in which(indNA)){
##
## }
#
#
## x <- constSum(x)
## dl <- dl/sum(dl)
# ## initialisation:
## x[is.na(x)] <- 0.001
# for(i in 1:length(dl)){
# ind <- is.na(x[,i])
# #PF# if(length(ind) > 0) x[ind,i] <- dl[i]/3*2
# if(length(ind) > 0) x[ind,i] <- dl[i]*runif(sum(ind),1/3,2/3)
# }
#
## x <- constSum(x)
#
# ## parameters:
# it=0
# criteria <- 10000000
# error <- rep(0, ncol(x))
# nComp <- normalerror <- numeric(ncol(x))
# if(noisemethod=="residuals") residuals <- matrix(,ncol=ncol(x), nrow=nrow(x))
# nComp[nComp==0] <- NA
# criteria <- 1e+07
# sigma <- mu <- rep(0, ncol(x))
#
# ###########################################
# ### start the iteration
# if(verbose) cat("\n start the iteration:")
# while(it <= maxit & criteria >= eps){
# xold <- x
# it=it+1
# for(i in which(indNA)){
# if(verbose) cat("\n column", i)
# ## change the first column with that one with the highest amount of NAs
# ## in the step
# if(wcol[indM[i]] > 0){
# xNA=x[,indM[i]]
# x1=x[,1]
# x[,1]=xNA
# x[,indM[i]]=x1
#
# if(method == "roundedZero"){
# xilr <- pivotCoord(x)
# phi <- pivotCoord(cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE]))[,1] # TODO: phi auserhalb der Schleife!
# ## --> x hat sich geaendert aber dl nicht.
# xilr2 <- data.frame(xilr)
# c1 <- colnames(xilr2)[1]
# colnames(xilr2)[1] <- "V1"
# reg1 = lm(V1 ~ ., data=xilr2)
# yhat2 <- predict(reg1, new.data=xilr2[,-i])
# if(bruteforce){
# xilr2[w[, indM[i]], 1] <- ifelse(yhat2[w[, indM[i]]] <= phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2[w[, indM[i]]] )
# } else {
# s <- sqrt(sum(reg1$res^2)/(nrow(xilr2)-ncol(xilr2)))
# ex <- (phi - yhat2)/s
# yhat2sel <- ifelse(dnorm(ex[w[, indM[i]]]) > .Machine$double.eps,
# yhat2[w[, indM[i]]] - s*dnorm(ex[w[, indM[i]]])/pnorm(ex[w[, indM[i]]]),
# yhat2[w[, indM[i]]])
# if(any(is.na(yhat2)) || any(yhat2=="Inf")) stop("Problems in ilr because of infinite or NA estimates")
# # check if we are under the DL:
# if(any(yhat2sel >= phi[w[, indM[i]]])){
# yhat2sel <- ifelse(yhat2sel > phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2sel)
# }
# xilr2[w[, indM[i]], 1] <- yhat2sel
# }
# }
# if (method == "pls") {
# xilr = pivotCoord(x)
# ttt <- cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE])
# phi <- pivotCoord(cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE]))[,1]
## if(verbose) cat("\n phi", phi, "in iteration", it)
# xilr2 <- data.frame(xilr)
# c1 <- colnames(xilr2)[1]
# colnames(xilr2)[1] <- "V1"
# if(it == 1){ ## evaluate ncomp.
# test <- xilr2[,!(colnames(xilr2) == "V1")]
# testy <- xilr2[,"V1"]
# X <- xilr2[,!(colnames(xilr2) == "V1")]
# y <- xilr2[,"V1"]
# nComp[i] <- bootnComp(xilr2[,!(colnames(xilr2) == "V1")],y=xilr2[,"V1"], R)
# }
# reg1 <- mvr(V1~.,ncomp=nComp[i], data = xilr2, method="simpls")
# yhat2 <- as.numeric(predict(reg1, new.data=xilr2[,-i], ncomp=nComp[i]) )
## if(noisemethod=="residuals") residuals[,i] <- reg1$residuals[,,nComp[i]]
## if(noisemethod=="normal"){
## mu[i] <- median(residuals(reg1)[,,nComp[i]])
## sigma[i] <- mad(residuals(reg1)[,,nComp[i]]) * noiseeffect
## }
# colnames(xilr2)[1] <- c1
## fit=reg1$fitted.values[,,nComp[i]]
# s <- sqrt(sum(reg1$residuals[,,nComp[i]]^2)/abs(nrow(xilr2)-ncol(xilr2)))
# ex <- as.numeric((phi - yhat2)/s )
# yhat2sel <- ifelse(dnorm(ex[w[, indM[i]]]) > .Machine$double.eps,
# yhat2[w[, indM[i]]] - s*dnorm(ex[w[, indM[i]]])/pnorm(ex[w[, indM[i]]]),
# yhat2[w[, indM[i]]])
# if(any(is.na(yhat2)) || any(yhat2=="Inf")) stop("Problems in ilr because of NaN or NA estimates")
# # check if we are under the DL:
# if(any(yhat2sel >= phi[w[, indM[i]]])){
# yhat2sel <- ifelse(yhat2sel > phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2sel)
# }
## if(verbose) cat("\n yhat2sel at iteration", it, "on column", i ,"is", yhat2sel)
# xilr2[w[, indM[i]], 1] <- yhat2sel
#
#
# }
# if(method == "roundedZeroRobust"){
# xilr <- pivotCoord(x)
# x[x < .Machine$double.eps] <- 0.00000000001 ## TODO: better solution
# phi <- pivotCoord(cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE]))[,1] # TODO: phi auserhalb der Schleife!
# xilr2 <- data.frame(xilr)
# c1 <- colnames(xilr2)[1]
# colnames(xilr2)[1] <- "V1"
# reg1 = rlm(V1 ~ ., data=xilr2, method="MM",maxit = 100)
## reg1 = lmrob(V1 ~ ., data=xilr2)
# yhat2 <- predict(reg1, new.data=xilr2[,-i])
# if(bruteforce){
# xilr2[w[, indM[i]], 1] <- ifelse(yhat2[w[, indM[i]]] <= phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2[w[, indM[i]]] )
# } else {
# s <- reg1$s
# #PF# s <- IQR(reg1$resid)/1.349
# ex <- (phi - yhat2)/s
# yhat2sel <- ifelse(dnorm(ex[w[, indM[i]]]) > .Machine$double.eps,
# yhat2[w[, indM[i]]] - s*dnorm(ex[w[, indM[i]]])/pnorm(ex[w[, indM[i]]]),
# yhat2[w[, indM[i]]])
# if(any(is.na(yhat2)) || any(yhat2=="Inf")) stop("Problems in ilr because of infinite or NA estimates")
# # check if we are under the DL:
# if(any(yhat2sel >= phi[w[, indM[i]]])){
# yhat2sel <- ifelse(yhat2sel > phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2sel)
# }
# xilr2[w[, indM[i]], 1] <- yhat2sel
# }
# }
#
# xilr <- xilr2
# x <- pivotCoordInv(xilr)
#
# ## return the order of columns:
# xNA=x[,1]
# x1=x[,indM[i]]
# x[,1]=x1
# x[,indM[i]]=xNA
# x <- adjust2(x, xcheck, w)
# print(x[1:2,1:2])
# if(verbose) cat("\n iteration", it, "column", i, "value", x[2,1])
# }
#
#
# }
#
#
# criteria <- sum( ((xold - x)/x)^2, na.rm=TRUE) ## DIRTY: (na.rm=TRUE)
# colnames(x) <- colnames(xcheck)
#
# }
#
# res <- list(xOrig=xcheck, xImp=x, criteria=criteria, iter=it, nComp=nComp,
# maxit=maxit, w=length(which(w)), wind=w)
# class(res) <- "imp"
## res <- adjust(res)
# invisible(res)
#}
# `impRZilr` <-
# function(x, maxit=10, eps=0.1, method="roundedZero",
# dl=rep(0.05, ncol(x)), bruteforce=FALSE){
#
# `ilrM` <-
# function(x, info=TRUE){
# x.ilr=matrix(NA,nrow=nrow(x),ncol=ncol(x)-1)
# D=ncol(x)
# for (i in 1:ncol(x.ilr)){
# x.ilr[,i]=sqrt((D-i)/(D-i+1))*log(((apply(as.matrix(x[,(i+1):D,drop=FALSE]),1,prod))^(1/(D-i)))/(x[,i]))
# }
# # invisible(-x.ilr)
# if(info) {res <- list(xilr=-x.ilr,
# xOrig=x)
# class(res) <- "ilrTransform"
# } else {
# res <- -x.ilr
# }
# res
# }
# `invilrM` <-
# function(x.ilr){
# if(class(x.ilr) =="ilrTransform" ){
# fac <- rowSums(x.ilr$xOrig)
# x.ilr <- x.ilr$xilr
# y <- matrix(0,nrow=nrow(x.ilr),ncol=ncol(x.ilr)+1)
# D=ncol(x.ilr)+1
# y[,1]=-sqrt((D-1)/D)*x.ilr[,1]
# for (i in 2:ncol(y)){
# for (j in 1:(i-1)){
# y[,i]=y[,i]+x.ilr[,j]/sqrt((D-j+1)*(D-j))
# }
# }
# for (i in 2:(ncol(y)-1)){
# y[,i]=y[,i]-sqrt((D-i)/(D-i+1))*x.ilr[,i]
# }
# yexp=exp(-y)
# x.back=yexp/apply(yexp,1,sum) * fac # * rowSums(derOriginaldaten)
# invisible(x.back)
# } else {
# y=matrix(0,nrow=nrow(x.ilr),ncol=ncol(x.ilr)+1)
# D=ncol(x.ilr)+1
# y[,1]=-sqrt((D-1)/D)*x.ilr[,1]
# for (i in 2:ncol(y)){
# for (j in 1:(i-1)){
# y[,i]=y[,i]+x.ilr[,j]/sqrt((D-j+1)*(D-j))
# }
# }
# for (i in 2:(ncol(y)-1)){
# y[,i]=y[,i]-sqrt((D-i)/(D-i+1))*x.ilr[,i]
# }
# yexp=exp(-y)
# x.back=yexp/apply(yexp,1,sum) # * rowSums(derOriginaldaten)
# invisible(x.back)
# #return(yexp)
# }
# x.back
# }
#
#
#
# if( is.vector(x) ) stop("x must be a matrix or data frame")
# stopifnot((method %in% c("ltsReg", "ltsReg2", "classical", "lm", "roundedZero","roundedZeroRobust")))
# if( length(dl) < ncol(x)) stop(paste("dl has to be a vector of ", ncol(x)))
#
#
# ## zeros to NA:
# x[x==0] <- NA
#
# ##index of missings / non-missings
# w <- is.na(x)
# wn <- !is.na(x)
# w2 <- apply(x, 1, function(x){
# length(which(is.na(x)))
# })
#
#
# ##sort the columns of the data according to the amount of missings in the variables
# wcol <- apply(x,2,function(x) length(which(is.na(x))))
# indM <- sort(wcol, index.return=TRUE, decreasing=TRUE)$ix
# cn <- colnames(x)
# xcheck <- x
#
# ## initialisation:
# # x[is.na(x)] <- 0.001
# for(i in 1:length(dl)){
# ind <- is.na(x[,i])
# #PF# if(length(ind) > 0) x[ind,i] <- dl[i]/3*2
# if(length(ind) > 0) x[ind,i] <- dl[i]*runif(sum(ind),1/3,2/3)
# }
#
# # x <- constSum(x)
#
# ## parameters:
# it=0
# criteria <- 10000000
# error <- rep(0, ncol(x))
#
# ###########################################
# ### start the iteration
# while(it <= maxit & criteria >= eps){
# xold <- x
# it=it+1
# for(i in 1:ncol(x)){
# ## change the first column with that one with the highest amount of NAs
# ## in the step
# if(wcol[indM[i]] > 0){
# xNA=x[,indM[i]]
# x1=x[,1]
# x[,1]=xNA
# x[,indM[i]]=x1
#
# if(method == "roundedZero"){
# xilr <- ilrM(x)
# phi <- ilrM(cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE]), info=FALSE)[,1] # TODO: phi auserhalb der Schleife!
# ## --> x hat sich geaendert aber dl nicht.
# xilr2 <- data.frame(xilr$xilr)
# c1 <- colnames(xilr2)[1]
# colnames(xilr2)[1] <- "V1"
# reg1 = lm(V1 ~ ., data=xilr2)
# yhat2 <- predict(reg1, new.data=xilr2[,-i])
# if(bruteforce){
# xilr2[w[, indM[i]], 1] <- ifelse(yhat2[w[, indM[i]]] <= phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2[w[, indM[i]]] )
# } else {
# # s <- sd(reg1$res, na.rm=TRUE)
# s <- sqrt(sum(reg1$res^2)/(nrow(xilr2)-ncol(xilr2)))
# ex <- (phi - yhat2)/s
# #####################################################
# # CHANGED PF:
# #PF# if(all(dnorm(ex) > 5 * .Machine$double.eps)) yhat2 <- yhat2 - s*dnorm(ex)/pnorm(ex)
# yhat2sel <- ifelse(dnorm(ex[w[, indM[i]]]) > .Machine$double.eps,
# yhat2[w[, indM[i]]] - s*dnorm(ex[w[, indM[i]]])/pnorm(ex[w[, indM[i]]]),
# yhat2[w[, indM[i]]])
# if(any(is.na(yhat2)) || any(yhat2=="Inf")) stop("Problems in ilr because of infinite or NA estimates")
# # check if we are under the DL:
# if(any(yhat2sel >= phi[w[, indM[i]]])){
# yhat2sel <- ifelse(yhat2sel > phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2sel)
# }
# xilr2[w[, indM[i]], 1] <- yhat2sel
# #####################################################
# }
# }
# if(method == "roundedZeroRobust"){
# xilr <- ilrM(x)
# x[x < .Machine$double.eps] <- 0.00000000001 ## TODO: better solution
# phi <- ilrM(cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE]), info=FALSE)[,1] # TODO: phi auserhalb der Schleife!
# xilr2 <- data.frame(xilr$xilr)
# c1 <- colnames(xilr2)[1]
# colnames(xilr2)[1] <- "V1"
# reg1 = rlm(V1 ~ ., data=xilr2, method="MM",maxit = 100)
# # reg1 = lmrob(V1 ~ ., data=xilr2)
# yhat2 <- predict(reg1, new.data=xilr2[,-i])
# if(bruteforce){
# xilr2[w[, indM[i]], 1] <- ifelse(yhat2[w[, indM[i]]] <= phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2[w[, indM[i]]] )
# } else {
# # s <- mad(reg1$res, na.rm=TRUE)
# s <- reg1$s
# #PF# s <- IQR(reg1$resid)/1.349
# ex <- (phi - yhat2)/s
# #####################################################
# # CHANGED PF:
# #PF# if(all(dnorm(ex) > 5 * .Machine$double.eps)) yhat2 <- yhat2 - s*dnorm(ex)/pnorm(ex)
# yhat2sel <- ifelse(dnorm(ex[w[, indM[i]]]) > .Machine$double.eps,
# yhat2[w[, indM[i]]] - s*dnorm(ex[w[, indM[i]]])/pnorm(ex[w[, indM[i]]]),
# yhat2[w[, indM[i]]])
# if(any(is.na(yhat2)) || any(yhat2=="Inf")) stop("Problems in ilr because of infinite or NA estimates")
# # check if we are under the DL:
# if(any(yhat2sel >= phi[w[, indM[i]]])){
# yhat2sel <- ifelse(yhat2sel > phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2sel)
# }
# xilr2[w[, indM[i]], 1] <- yhat2sel
# #####################################################
# }
# }
#
# xilr$xilr <- xilr2
# x=invilrM(xilr)
# ## return the order of columns:
# xNA=x[,1]
# x1=x[,indM[i]]
# x[,1]=x1
# x[,indM[i]]=xNA
# }
#
#
# }
#
#
# criteria <- sum( ((xold - x)/x)^2, na.rm=TRUE) ## DIRTY: (na.rm=TRUE)
# colnames(x) <- colnames(xcheck)
#
# }
#
# res <- list(xOrig=xcheck, xImp=x, criteria=criteria, iter=it,
# maxit=maxit, w=length(which(w)), wind=w)
# class(res) <- "imp"
# invisible(res)
# }
#
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.