Nothing
#' Imputation of missing values in compositional data using knn methods
#'
#' This function offers several k-nearest neighbor methods for the imputation
#' of missing values in compositional data.
#'
#' The Aitchison \code{metric} should be chosen when dealing with compositional
#' data, the Euclidean \code{metric} otherwise.
#'
#' If \code{primitive} \eqn{==} FALSE, a sequential search for the
#' \eqn{k}-nearest neighbors is applied for every missing value where all
#' information corresponding to the non-missing cells plus the information in
#' the variable to be imputed plus some additional information is available. If
#' \code{primitive} \eqn{==} TRUE, a search of the \eqn{k}-nearest neighbors
#' among observations is applied where in addition to the variable to be
#' imputed any further cells are non-missing.
#'
#' If \code{normknn} is TRUE (prefered option) the imputed cells from a nearest
#' neighbor method are adjusted with special adjustment factors (more details
#' can be found online (see the references)).
#'
#' @param x data frame or matrix
#' @param method method (at the moment, only \dQuote{knn} can be used)
#' @param k number of nearest neighbors chosen for imputation
#' @param metric \dQuote{Aichison} or \dQuote{Euclidean}
#' @param agg \dQuote{median} or \dQuote{mean}, for the aggregation of the
#' nearest neighbors
#' @param primitive if TRUE, a more enhanced search for the $k$-nearest
#' neighbors is obtained (see details)
#' @param normknn An adjustment of the imputed values is performed if TRUE
#' @param das depricated. if TRUE, the definition of the Aitchison distance,
#' based on simple logratios of the compositional part, is used (Aitchison,
#' 2000) to calculate distances between observations. if FALSE, a version
#' using the clr transformation is used.
#' @param adj either \sQuote{median} (default) or \sQuote{sum} can be chosen
#' for the adjustment of the nearest neighbors, see Hron et al., 2010.
#' @return \item{xOrig }{Original data frame or matrix} \item{xImp }{Imputed
#' data} \item{w }{Amount of imputed values} \item{wind }{Index of the missing
#' values in the data} \item{metric }{Metric used}
#' @author Matthias Templ
#' @export
#' @seealso \code{\link{impCoda}}
#' @references Aitchison, J., Barcelo-Vidal, C., Martin-Fernandez, J.A.,
#' Pawlowsky-Glahn, V. (2000) Logratio analysis and compositional distance,
#' \emph{Mathematical Geology}, 32(3), 271-275.
#'
#' Hron, K., Templ, M., Filzmoser, P. (2010) Imputation of missing values
#' for compositional data using classical and robust methods
#' \emph{Computational Statistics and Data Analysis}, 54 (12),
#' 3095-3107.
#' @keywords manip multivariate
#' @examples
#'
#' data(expenditures)
#' x <- expenditures
#' x[1,3]
#' x[1,3] <- NA
#' xi <- impKNNa(x)$xImp
#' xi[1,3]
#'
`impKNNa` <-
function(x, method="knn", k=3,
metric="Aitchison", agg="median", primitive=FALSE, normknn=TRUE, das=FALSE, adj="median"){
### Coda Version 2!
### Nearest Neighbor imputation algorithm of Missing values
### based on distance matrices.
###
### Program by Matthias Templ, Oktober 2008
### R Version: 2.7.1
### Copyright : TU-Wien, 2008
###
# TODO: help file mit adj ergaenzen
x <- as.matrix(x)
class(x) <- "matrix"
N <- dim(x)[1]
P <- dim(x)[2]
if(any(rowSums(is.na(x)) == P)) stop("One or more observations constist of only NA's")
xOrig <- xmiss <- x
w <- is.na(x)
w2 <- !is.na(x)
if(metric=="Euclidean" & primitive==FALSE){
findknn <- function(x, i, j){
## find knn
m1 <- which(!is.na(x[i,]))
mi <- which(!is.na(x[,j,drop=FALSE]) & !is.na(rowSums(x[,m1,drop=FALSE])) )
d <- rowSums(t((t(x[mi,m1,drop=FALSE]) - x[i,m1])^2))
names(d) <- mi
as.numeric(names(which(d <= quantile(d, k/N))))
}
}
if(metric=="Euclidean" & primitive==TRUE){
findknn <- function(x, i, j){
## find knn
m1 <- which(!is.na(x[i,]))
mi <- which(!is.na(x[,j,drop=FALSE]) )
d <- rowSums(t((t(x[mi,m1,drop=FALSE]) - x[i,m1])^2),na.rm=TRUE)
names(d) <- mi
as.numeric(names(which(d <= quantile(d, k/N))))
}
}
if(metric=="Aitchison" & primitive==FALSE){
findknn <- function(x, i, j){
m1 <- which(!is.na(x[i,]))
mi <- which(c(!is.na(x[,j,drop=FALSE])) & c(!is.na(rowSums(x[,m1,drop=FALSE]))))
xclr <- cenLR(rbind(x[mi, m1, drop=FALSE], x[i, m1]))$x.clr
d <- rowSums(t(abs(t(xclr[-nrow(xclr),]) - c(xclr[nrow(xclr),]))))
names(d) <- mi
wA <- which(d <= quantile(d, k/length(d)))
### new change for zeros paper:
# print(is.numeric(xclr))
#if(is.numeric(xclr)) xclr <- matrix(xclr, ncol=length(xclr))
#d <- rowSums(t(abs(t(xclr[-nrow(xclr),,drop=FALSE]) - c(xclr[nrow(xclr),,drop=FALSE]))), na.rm=TRUE)
#names(d) <- mi
#print(d)
#wA <- which(d <= quantile(d, k/length(d)))
### end new change
w <- as.numeric(names(wA))
list(knn=w, m1=m1)
}
}
if(metric=="Aitchison" & primitive==TRUE & das == TRUE){
findknn <- function(x, i, j){
m1 <- which(!is.na(x[i,]))
mi <- which(c(!is.na(x[,j,drop=FALSE])) )
da <- function(x,y){
d <- 0
p <- length(x)
for(i in 1:(p-1)){
for(j in (i+1):p){
d <- d + (log(x[i]/x[j]) - log(y[i]/y[j]))^2
}
}
d=d/p
d
}
ref <- x[i,m1]
d <- apply(x[mi, m1, drop=FALSE], 1, function(z){da(x=z, y=ref)})
names(d) <- mi
wA <- which(d <= quantile(d, k/length(d)))
w <- as.numeric(names(wA))
list(knn=w, m1=m1)
}
}
if(metric=="Aitchison" & primitive==FALSE & das == TRUE){
findknn <- function(x, i, j){
m1 <- which(!is.na(x[i,]))
mi <- which(c(!is.na(x[,j,drop=FALSE])) & c(!is.na(rowSums(x[,m1,drop=FALSE]))))
da <- function(x,y){
d <- 0
p <- length(x)
for(i in 1:(p-1)){
for(j in (i+1):p){
d <- d + (log(x[i]/x[j]) - log(y[i]/y[j]))^2
}
}
d=d/p
d
}
ref <- x[i,m1]
d <- apply(x[mi, m1, drop=FALSE], 1, function(z){da(x=z, y=ref)})
names(d) <- mi
wA <- which(d <= quantile(d, k/length(d)))
w <- as.numeric(names(wA))
list(knn=w, m1=m1)
}
}
if(metric=="Aitchison" & primitive==TRUE){
findknn <- function(x, i, j){
m1 <- which(!is.na(x[i,]))
a <- c(!is.na(x[,j,drop=FALSE]))
b <- ifelse(rowSums(is.na(x[,-j])) == 0, TRUE, FALSE)
mi <- which(a & b)
xclr <- cenLR(rbind(x[mi, m1, drop=FALSE], x[i, m1]))$x.clr
d <- rowSums(t(abs(t(xclr[-nrow(xclr),]) - c(xclr[nrow(xclr),]))), na.rm=TRUE)
#d[d == 0] <- NA ## dirty
names(d) <- mi
wA <- which(d <= quantile(d, k/length(d), na.rm=TRUE))
w <- as.numeric(names(wA))
list(knn=w, m1=m1)
}
}
if(metric=="Euclidean"){
imp <- function(x, i, j){
## workhorse: do the imputation
knn <- findknn(x, i, j)
get(agg)(x[knn,j], na.rm=TRUE) #median
}
}
if(metric=="Aitchison" & normknn == FALSE){
imp <- function(x, i, j){
## workhorse: do the imputation
knn <- findknn(x, i, j)
### median, normalization:
#fac <- knn$xclriSum/knn$medKnnSum
#median(x[knn$knn, j])#*fac # fac anders berechnen
medSum <- get(adj)(apply(x[knn$knn, knn$m1, drop=FALSE], 2, function(x,...){get(agg)(x, na.rm=TRUE)} ), na.rm=TRUE)
xSum <- get(adj)(x[i, knn$m1, drop=FALSE], na.rm=TRUE)
fac <- xSum/medSum
get(agg)(x[knn$knn, j], na.rm=TRUE)*fac #median
}
}
if(metric=="Aitchison" & normknn == TRUE){
imp <- function(x, i, j){
knn <- findknn(x, i, j)
##normalization:
#xSum <- sum(x[i, knn$m1, drop=FALSE], na.rm=TRUE)
xSum <- get(adj)(x[i, knn$m1, drop=FALSE], na.rm=TRUE)
if( length(knn$knn) == 1){
#knnSum <- sum(x[knn$knn, knn$m1], na.rm=TRUE)
knnSum <- get(adj)(x[knn$knn, knn$m1], na.rm=TRUE)
} else{
#knnSum <- rowSums(x[knn$knn, knn$m1, drop=FALSE], na.rm=TRUE)
knnSum <- apply(x[knn$knn, knn$m1, drop=FALSE], 1, function(x,...){get(agg)(x, na.rm=TRUE)} )
}
fac <- xSum/knnSum
get(agg)(x[knn$knn, j] * fac, na.rm=TRUE)
## statt Mean das geom. Mittel?
}
}
#if(metric == "Aitchison" & ratios == TRUE){
# knn <- findknn(x, i, j)
# p <- ncol(x)
# y <- apply(apply(x[knn$knn, , drop = FALSE], 1, function(x){x[2:p]/x[1]}),1,median)
# x[i,j] <- 1
# x[i,-j] <- y
#}
#if(metric=="Aitchison" & unweighted == TRUE){
# imp <- function(x, i, j){
# knn <- findknn(x, i, j)
# x[i,j] <- get(agg)(x[knn$knn, j])
# }
#}
for(i in 1:N){
for(j in 1:P){
if(is.na(x[i,j])){
x[i,j] <- imp(xmiss, i, j)
}
}
}
w <- is.na(xOrig)
colnames(x) <- colnames(xOrig)
res <- list(xOrig=xOrig, xImp=x, criteria=NULL, iter=NULL, w=length(which(w)), wind=w, metric=metric)
class(res) <- "imp"
res
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.