#' Completed data sets as long data.frame
#'
#' Completed (imputed) data sets given incomplete data passed to
#' \code{\link{smirf}} and the output thereof, returned in a long-format
#' data.frame.
#'
#' Description
#'
#' @param X data.frame;
#' incomplete data set passed to \code{\link{smirf}} in construction of
#' \code{mf}.
#' @param mf list;
#' imputed data sets and output of missForest procedure as returned by
#' \code{\link{smirf}}.
#' @param k integer;
#' number of completed data sets to generate, default is the number of
#' imputed data sets in \code{mf}.
#' @param to.complete character;
#' vector of names of variables to include, default is all variables in
#' imputed data sets in \code{mf}.
#' @param replace boolean;
#' indicator of sampling from imputed data sets with replacement or not.
#' @return data.frame;
#' completed long-format data containing the columns of \code{X} and two
#' more columns, \describe{
#' \item{\code{.source}}{integer; id of the source data set in \code{mf}
#' of the imputed data in the row (if any), otherwise \code{NA},
#' and;}
#' \item{\code{.imp}}{integer; id of the complete data set between 1 and
#' \code{k}, or \code{NA} if it belongs to all (i.e. contains no
#' imputed data),}
#' } and each row is either \itemize{
#' \item a complete case whose values are unaltered, or;
#' \item an incomplete case with a sample from the imputed data
#' replacing any missing values.
#' }
#'
#' @seealso \code{\link{smirf}}
#'
#' @section To-do:
#' \itemize{
#' \item document \code{complete_results}
#' \item write tests for \code{complete_results}
#' }
#'
#' @export
complete_results <- function(X,
mf,
k=length(mf$results),
to.complete=names(mf$which_imputed),
replace=F) {
# TODO: perhaps write check arguments function
if (inherits(X, 'grouped_df'))
warning('groups in X will be ignored and discarded.')
not_found <- names(to.complete)[!(names(to.complete) %in%
names(mf$which_imputed))]
if (length(not_found) > 0)
stop(paste0('variables in to.complete not found in imputed data set:\n',
' - ', paste0(not_found, collapse=', '), '.'))
# number of imputed data sets
n <- length(mf$results)
to_use <- 1L + sapply(mf$results, getElement, 'iterations')
which_imputed <- unique(do.call(c, unname(mf$which_imputed[to.complete])))
J <- samples_as_matrix(n, k, size=length(which_imputed), replace=replace)
X_ <- X[rep(which_imputed, times=k),,drop=F]
# over each data set generated by smirf
for (source_set in seq_len(n)) {
# observations to which this imputed data set is assigned
to_j <- which(J == source_set, arr.ind=T)
# rows of X_ to which the imputed data is assigned
to_X_ <- (to_j[,2] - 1L) * length(which_imputed) + to_j[,1]
# record 'id' of imputed data set (source) and the final data set
X_[to_X_, '.source'] <- source_set
X_[to_X_, '.imp'] <- to_j[,2]
# over each to-be completed variable
for (v in to.complete) {
# indices of imputed data from which value is taken
# could be taken outside loop
from_j <- match(which_imputed[to_j[,1]],
mf$which_imputed[[v]],
nomatch=0L)
X_[to_X_[from_j > 0L],v] <- with(mf$results[[source_set]],
imputed[[
to_use[source_set]
]][[v]][from_j])
}
}
X[,'.source'] <- X[,'.imp'] <- NA_integer_
rbind(X[-which_imputed,], X_)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.