#' hmi: Hierarchical Multilevel Imputation.
#'
#' The user has to pass his data to the function.
#' Optionally he passes his analysis model formula so that \code{hmi} runs the imputation model
#' in line with his analysis model formula.\cr
#' And of course he can specify some parameters for the imputation routine
#' (like the number of imputations and iterations and the number of iterations
#' within the Gibbs sampling).\cr
#' @param data A \code{data.frame} with all variables appearing in \code{model_formula}.
#' @param model_formula A \code{\link[stats]{formula}} used for the analysis model.
#' Currently the package is designed to handle formula used in
#' \code{lm}, \code{glm} and \code{lmer}.
#' @param additional_variables A character with names of variables (separated by "+", like "x8+x9")
#' that should be included in the imputation model as fixed effects variables,
#' but not in the analysis model.
#' An alternative would be to include such variable names into the \code{model_formula}
#' an run a reduced analysis model with \code{hmi_pool} or the functions provide by \code{mice}.
#' @param M An integer defining the number of imputations that should be made.
#' @param maxit An integer defining the number of times the imputation cycle
#' (imputing \eqn{x_1|x_{-1}} then \eqn{x_2|x_{-2}}, ... and finally \eqn{x_p|x_{-p}}) shall be repeated.
#' The task of checking convergence is left to the user, by evaluating the chainMean and chainVar!
#' @param nitt An integer defining number of MCMC iterations (see \code{MCMCglmm}).
#' @param burnin An integer for the desired number of
#' Gibbs samples that shall be regarded as burnin.
#' @param thin An integer to set the thinning interval range. If thin = 1,
#' every iteration of the Gibbs-sampling chain will be kept. For highly autocorrelated
#' chains, that are only examined by few iterations (say less than 1000),
#' the \code{geweke.diag} might fail to detect convergence. In such cases it is
#' essential to look a chain free from autocorelation.
#' @param mn An integer defining the minimum number of individuals per cluster.
#' @param spike A numeric value saying to which value the semi-continuous data might be heaped.
#' Or a list with with such values and names identical to the variables with heaps
#' (see \code{list_of_spikes_maker} for details.) In versions ealier to 0.9.0 it was called \code{heap}.
#' @param heap Use spike instead. \code{heap} is only included due to backwards compatibility
#' and will be removed with version 1.0.0
#' @param rounding_degrees A numeric vector with the presumed rounding degrees. Or a list with rounding degrees,
#' where each list element has the name of a rounded continuous variable. Such a list can be generated
#' using \code{list_of_rounding_degrees_maker(data)}.
#' @param list_of_types a list where each list element has the name of a variable
#' in the data.frame. The elements have to contain a single character denoting the type of the variable.
#' See \code{get_type} for details about the variable types.
#' With the function \code{list_of_types_maker}, the user can get the framework for this object.
#' In most scenarios this is should not be necessary.
#' One example where it might be necessary is when only two observations
#' of a continuous variable are left - because in this case \code{get_type}
#' interpret is variable to be binary. Wrong is it in no case.
#' @param pool_with_mice A Boolean indicating whether the user wants to pool the \code{M} data sets by mice
#' using his \code{model_formula}. The default value is \code{FALSE} because this tampers the
#' \code{mids} object as it adds an argument \code{pooling} not found in "normal" \code{mids} objects
#' generated by \code{mice}.
#' @return The function returns a \code{mids} object. See \code{mice} for further information.
#' @examples
#' \dontrun{
#' my.formula <- Reaction ~ Days + (1 + Days|Subject)
#' my_analysis <- function(complete_data){
#' # In this list, you can write all the parameters you are interested in.
#' # Those will be averaged.
#' # So make sure that averaging makes sense and that you only put in single numeric values.
#' parameters_of_interest <- list()
#'
#' # ---- write in the following lines, what you are interetest in to do with your complete_data
#' # the following lines are an example where the analyst is interested in the fixed intercept
#' # and fixed slope and the random intercepts variance,
#' # the random slopes variance and their covariance
#' my_model <- lmer(my.formula, data = complete_data)
#'
#' parameters_of_interest[[1]] <- fixef(my_model)[1]
#' parameters_of_interest[[2]] <- fixef(my_model)[2]
#' parameters_of_interest[[3]] <- VarCorr(my_model)[[1]][1, 1]
#' parameters_of_interest[[4]] <- VarCorr(my_model)[[1]][1, 2]
#' parameters_of_interest[[5]] <- VarCorr(my_model)[[1]][2, 2]
#' names(parameters_of_interest) <- c("beta_intercept", "beta_Days", "sigma0", "sigma01", "sigma1")
#'
#' # ---- do change this function below this line.
#' return(parameters_of_interest)
#' }
#' require("lme4")
#' require("mice")
#' data(sleepstudy, package = "lme4")
#' test <- sleepstudy
#' test[sample(1:nrow(test), size = 20), "Reaction"] <- NA
#' hmi_imp <- hmi(data = test, model_formula = my.formula, M = 5, maxit = 1)
#' hmi_pool(mids = hmi_imp, analysis_function = my_analysis)
#' #if you are interested in fixed effects only, consider pool from mice:
#' pool(with(data = hmi_imp, expr = lmer(Reaction ~ Days + (1 + Days | Subject))))
#' }
#' @export
hmi <- function(data,
model_formula = NULL,
additional_variables = NULL,
M = 5,
maxit = NULL,
nitt = 25000,
burnin = 5000,
thin = 20,
mn = 1,
spike = 0,
heap = NULL,
rounding_degrees = c(1, 10, 100, 1000),
list_of_types = NULL,
pool_with_mice = TRUE){
options(error = expression(NULL))
if(is.null(heap)){
heap <- spike
}else{
warning("To avoid confusion with >>heap<< when writing about the >>rounding_degrees>>,
>>heap<< was renamed to >>spike<<.")
}
if(is.null(list_of_types)){
tmp_list_of_types <- list_of_types_maker(data, rounding_degrees = rounding_degrees)
}else{
if(!is.list(list_of_types)) stop("We need list_of_types to be a list.")
if(!isTRUE(all.equal(sort(names(list_of_types)), sort(colnames(data))))){
stop("At least one variable in list_of_types isn't found in data or vica versa.")
}
if(any(lapply(list_of_types, class) != "character")){
stop("We need the elements of list_of_types to be of class character.")
}
if(any(lapply(list_of_types, length) != 1)){
stop("We need that each element of list_of_types only contains one character.")
}
tmp_list_of_types <- list_of_types
}
my_data <- data
if(is.matrix(my_data)) stop("We need your data in a data.frame (and not a matrix).")
if(!is.data.frame(my_data)) stop("We need your data in a data.frame.")
if(any(apply(my_data, 1, function(x) all(is.na(x))))){
warning("Some observations have a missing value in every variable.
This can increase your estimate's variance.
We strongly suggest to remove those observations.")
}
if(any(apply(my_data, 2, function(x) all(is.na(x))))){
warning("Some variables have a missing values in every observation.
Remove those variables from your data.frame (e.g. by setting them to NULL).")
}
# recode "-Inf;Inf" in interval data to NA
#intervals from -Inf to Inf shall be recoded as NA.
for(j in 1:ncol(my_data)){
data[, j] <- sna_interval(my_data[, j])
my_data[, j] <- sna_interval(my_data[, j])
}
#get missing rates
missing_rates <- apply(my_data, 2, function(x) sum(is.na(x)))/nrow(my_data)
# it's not necessary to divide by nrow(my_data),
# but we keep this for more intuitive debugging
#get variables with missings
incomplete_variables <- names(my_data)[missing_rates > 0]
variables_to_impute <- union(incomplete_variables,
names(tmp_list_of_types)[tmp_list_of_types == "roundedcont" |
tmp_list_of_types == "interval"])
# Set a default value of maxit.
# This default value should be 1 in case of only one variable to impute.
# (In such cases the imputation cycle cannot further converge, so 1 is efficient.)
# In the other cases, it is set to 5.
if(is.null(maxit)){
if(length(variables_to_impute) <= 1){
maxit <- 1
}else{
maxit <- 5
}
}
#Check nitt. To check whether nitt is a whole number, the example from ?integer is used.
is.wholenumber <-
function(x, tol = .Machine$double.eps^0.5) abs(x - round(x)) < tol
if(!is.wholenumber(nitt) | nitt < 1){
warning("nitt must be an integer >= 1")
}
if(any(missing_rates > 0.9)){
many_NAs <- names(my_data)[missing_rates > 0.9]
writeLines(paste("The variable(s) >>",
paste(many_NAs, collapse = " and "),
"<< have missing rates above 90%.
This can result in instable imputation results for this variable
and later in failing imputation routines for other variables (like count variables).
How do you want to proceed: \n
[c]ontinue using these variables
or [e]xiting the imputation?"))
proceed <- readline("Type 'c' or 'e' into the console and press [enter]: ")
if(proceed == "e") return(NULL)
}
# # # # # # # # # # # get the formula elements (fe) and do consistency checks # # # # # # # # # # # # # # # #
constant_variables <- apply(my_data, 2, function(x) length(unique(x)) == 1)
if(sum(constant_variables) >= 2){
print(paste("Your data has two or more constant variables: ",
paste(names(my_data)[constant_variables], collapse = ", "),
". For the imputation we kept only the first to avoid multicolinearity.", sep = ""))
my_data[, names(my_data)[constant_variables][-1]] <- NULL
}
# Searches for intercepts
# In a model there a three ways to specify an intercept:
# 1: Explicitly by writing ~ 1 + X1 + ...
# 2: Implicitly by writing ~ X1 + ...
# 3: By refering to an exisiting intercept variable
# (eg. if called "Int"): ~ X1 + Int + ...
model_formula_org <- model_formula
if(is.null(model_formula)){
model_has_intercept <- TRUE
}else{
if(is.null(additional_variables)){
model_formula <- stats::as.formula(format(model_formula))
}else{
model_formula <- stats::as.formula(paste(format(model_formula), additional_variables, sep = "+"))
}
model_has_intercept <- fixed_intercept_check(model_formula) |
random_intercept_check(model_formula) |
any(names(which(constant_variables)) %in% all.vars(stats::delete.response(stats::terms(lme4::nobars(model_formula)))))
}
dataset_has_intercept <- any(constant_variables)
if(model_has_intercept){
if(dataset_has_intercept){
#nothing to do in this case: just use this intercept
}else{
#in the case an intercept is needed but not present in the data: add an intercept
# generate a variable name that is guaranteed not used in the data
# by adding something to the longest variable name.
intercept_varname <- paste("Intercept_",
colnames(my_data)[which.max(nchar(colnames(my_data)))], sep = "")
my_data[, intercept_varname] <- 1
}
}else{
if(dataset_has_intercept){
#We decided always to include an intercept, even if the analysis model excludes one
#for the target variable. Reason: For the imputation of another (co)variable,
#it is not clear if the user also want to exclude an intercept.
#And we think an intercept is generally beneficial.
#The following code might be used if intercepts should be excluded:
#intercept_varname <- names(which(constant_variables))
#warning(paste("We droped the constant variable >>", intercept_varname,
#"<< in the imputation process as the model_formula excluded an intercept.", sep = ""))
#my_data[, intercept_varname] <- NULL
}else{
#nothing to do in this case
}
}
constant_variables <- apply(my_data, 2, function(x) length(unique(x)) == 1)
if(sum(constant_variables) >= 2){
print(paste("Your data has two or more constant variables: ",
paste(names(my_data)[constant_variables], collapse = ", "),
". For the imputation we kept only the first to avoid multicolinearity.", sep = ""))
my_data[, names(my_data)[constant_variables][-1]] <- NULL
}
variable_names_in_data <- colnames(my_data)
fe <- extract_varnames(model_formula = model_formula,
constant_variables = constant_variables,
variable_names_in_data = variable_names_in_data)
# check if all fix effects covariates are available in the data set
if(all(fe$fixedeffects_varname != "") & !all(fe$fixedeffects_varname %in% names(my_data))){
writeLines(paste("We didn't find >>",
paste(fe$fixedeffects_varname[!fe$fixedeffects_varname %in% names(my_data)],
collapse = " and "),
"<< in your data. How do you want to proceed: \n
[c]ontinue with ignoring the model_formula an running a single level imputation
or [e]xiting the imputation?"))
proceed <- readline("Type 'c' or 'e' into the console and press [enter]: ")
if(proceed == "e") return(NULL)
#if the model_formula is ignored every extracted value of the model_formula is ignored...
model_formula <- NULL
fe[1:length(fe)] <- ""
#... and every variable is used as a fixed effects variable, because in this case a single level imputation
#on every variable is run
fe$fixedeffects_varname <- colnames(my_data)
}
# check if all random effects covariates are available in the data set
if(all(fe$randomeffects_varname != "") & !all(fe$randomeffects_varname %in% names(my_data))){
writeLines(paste("We didn't find >>",
paste(fe$randomeffects_varname[!fe$randomeffects_varname %in% names(my_data)],
collapse = " and "),
"<<in your data. How do you want to proceed: \n
[c]ontinue with ignoring the model_formula an running a single level imputation
or [e]xiting the imputation?"))
proceed <- readline("Type 'c' or 'e' into the console and press [enter]: ")
if(proceed == "e") return(NULL)
model_formula <- NULL
fe[1:length(fe)] <- ""
fe$fixedeffects_varname <- colnames(my_data)
}
if(all(c("0", "1") %in% fe$randomeffects_varname)){
warning("Your model_formula includes 0 for no random intercept and 1 for a random intercept at the same time.
Please decide whether you want a random intercept or not.")
}
#it could be the case that the user specifies several cluster ids.
#As the tool doesn't support this yet, we stop the imputation routine.
if(length(fe$clID_varname) >= 2){
writeLines("The package only supports one cluster ID in the model_formula. \n
How do you want to proceed: \n
[c]ontinue with ignoring the model_formula an running a single level imputation
or [e]xiting the imputation?")
proceed <- readline("Type 'c' or 'e' into the console and press [enter]: ")
if(proceed == "e") return(NULL)
model_formula <- NULL
fe[1:length(fe)] <- ""
fe$fixedeffects_varname <- colnames(my_data)
}
#check whether the formula matches the data
if(fe$clID_varname != "" & !(fe$clID_varname %in% names(my_data))){
writeLines(paste("We didn't find", fe$clID_varname,
"in your data. How do you want to proceed: \n
[c]ontinue with ignoring the model_formula an running a single level imputation
or [e]xiting the imputation?"))
proceed <- readline("Type 'c' or 'e' into the console and press [enter]: ")
if(proceed == "e") return(NULL)
model_formula <- NULL
fe[1:length(fe)] <- ""
fe$fixedeffects_varname <- colnames(my_data)
}
#check whether the constant variable in the dataset are labbeld "1" if this is the label of the
#intercept variable in the model formula.
if(any(constant_variables)){
if((any(fe$fixedeffects_varname == "1") &
variable_names_in_data[constant_variables] != "1")|
(any(fe$randomeffects_varname == "1") &
variable_names_in_data[constant_variables] != "1")){
warning(paste("Your model_formula includes '1' for an intercept.
The constant variable in your data is named ", variable_names_in_data[constant_variables], ".
Consider naming the variable also '1' or 'Intercept'.", sep = ""))
}
}
#make sure that the intercept variable exist, and has the value 1
if(fe$intercept_varname != ""){
my_data[, fe$intercept_varname] <- 1
}
if(fe$clID_varname != ""){
my_data[, fe$clID_varname] <- as.factor(my_data[, fe$clID_varname])
}
#generate interaction variable(s), which will be updated after each imputation of a variable
interaction_names <- list()
if(fe$interactions != ""){
for(i in 1:length(fe$interactions)){
#get the product of the interaction variables
interaction <- apply(my_data[, strsplit(fe$interactions[i], ":")[[1]], drop = FALSE], 1, prod)
#get a name for the interaction
tmp_interaction_name <- paste("Interaction_", i, "_",
colnames(my_data)[which.max(nchar(colnames(my_data)))],
sep = "")
#add the interaction to the dataset
my_data[, tmp_interaction_name] <- interaction
#This step has to be repeated after each imputation of a variable.
#To avoid ever groing names, we fix the names of the interaction variables in a list
interaction_names[[i]] <- tmp_interaction_name
}
}
########################### MERGE SMALL CLUSTERS ####################
if(fe$clID_varname != ""){
tab_1 <- table(my_data[, fe$clID_varname])
if(!is.numeric(mn)) stop("mn has to be an integer (whole number).")
if(mn %% 1 != 0) stop("mn has to be an integer (whole number).")
#merge small clusters to a large big cluster
if(any(tab_1 < mn)){
writeLines(paste(sum(tab_1 < mn), "clusters have less than", mn, "observations.",
"How do you want to proceed: \n
[c]ontinue with merging these clusters with others
or [e]xiting the imputation?"))
proceed <- readline("Type 'c' or 'e' into the console and press [enter]: ")
if(proceed == "e") return(NULL)
# The procedure is the following:
# 1. determine the smallest and the second smallest cluster.
# 2. take the observations of the smallest cluster and combine it with observations of the second smallest cluster.
# this is be done by giving both clusters the name "[name of second smallest cluster] forced merge".
# 3. repeat with 1 until there is no invalid cluster left
while(any(tab_1 < mn)){
#step 1: determine the smallest and the second smallest cluster.
smallest_cluster <- tab_1[tab_1 == sort(as.numeric(tab_1))[1]][1]
tab_1_without_smallest_cluster <- tab_1[names(tab_1) != names(smallest_cluster)]
second_smallest_cluster <- tab_1_without_smallest_cluster[tab_1_without_smallest_cluster == sort(as.numeric(tab_1_without_smallest_cluster))[1]][1]
#step 2: new cluster name
new_name <- names(second_smallest_cluster)
levels(my_data[, fe$clID_varname])[levels(my_data[, fe$clID_varname]) == names(smallest_cluster)] <- new_name
levels(my_data[, fe$clID_varname])[levels(my_data[, fe$clID_varname]) == names(second_smallest_cluster)] <- new_name
#step 3: repeat.
tab_1 <- table(my_data[, fe$clID_varname])
}
}
if(length(tab_1) <= 2){
writeLines("The package currently requires more than two clusters\n
How do you want to proceed: \n
[c]ontinue with ignoring the model_formula an running a single level imputation
or [e]xiting the imputation?")
proceed <- readline("Type 'c' or 'e' into the console and press [enter]: ")
if(proceed == "e") return(NULL)
model_formula <- NULL
fe[1:length(fe)] <- ""
fe$fixedeffects_varname <- colnames(my_data)
}
}
######################################## START THE IMPUTATION #####################
# get the variable types:
rounding_degrees_tmp <- rounding_degrees
types <- array(dim = ncol(my_data))
for(j in 1:ncol(my_data)){
if(is.list(rounding_degrees)){
rounding_degrees_tmp <- rounding_degrees[[colnames(my_data)[j]]]
}
types[j] <- get_type(my_data[, j], rounding_degrees = rounding_degrees_tmp)
}
categorical <- types == "categorical"
if(any(categorical)){
for(catcount in 1:sum(categorical)){
my_data[, names(my_data)[categorical][catcount]] <-
as.factor(my_data[, names(my_data)[categorical][catcount]])
}
}
NA_locator <- is.na(my_data)
#################### Preparation for setting up a mids-object
alldata <- list()#this object saves the M datasets.
my_chainMean <- array(dim = c(length(variables_to_impute), maxit, M))
dimnames(my_chainMean) <- list(variables_to_impute,
as.character(1:maxit),
paste("Chain", 1:M))
my_chainVar <- my_chainMean
cat("Imputation progress:\n")
cat("0% 20% 40% 60% 80% 100%\n")
cat("|")
progress_plot <- "----|----|----|----|----|"
tickts_drawn <- 0
if(is.null(list_of_types)){
tmp_list_of_types <- list_of_types_maker(my_data, rounding_degrees = rounding_degrees)
#Note: here it can happen that a variable in the original_data is assumed to be
#semicont, but after the first imputation it is considered to be continuous.
#Therefore we update the list of types absed on the data_before
#and not on the original data.
}else{
tmp_list_of_types <- list_of_types
}
#initialize the list for the chains of Gibbs-samplings
gibbs <- list()
for(i in 1:M){
gibbs[[paste("imputation", i, sep = "")]] <- list()
for(l1 in 1:maxit){
#do the imputation cycle
tmp <- imputationcycle(data_before = my_data,
original_data = data,
NA_locator = NA_locator,
fe = fe,
interaction_names = interaction_names,
list_of_types = tmp_list_of_types,
nitt = nitt,
burnin = burnin,
thin = thin,
mn = mn,
heap = heap,
rounding_degrees = rounding_degrees)
my_data <- tmp$data_after
gibbs[[paste("imputation", i, sep = "")]][[paste("cycle", l1, sep = "")]] <-
tmp$chains
if(is.null(list_of_types)){
tmp_list_of_types <- list_of_types_maker(data, rounding_degrees = rounding_degrees)
#Note: here it can happen that a variable in the original_data is assumed to be
#semicont, but after the first imputation it is considered to be continuous.
#Therefore we update the list of types absed on the data_before
#and not on the original data.
}else{
tmp_list_of_types <- list_of_types
}
incomplete_variables <- colnames(NA_locator)[colSums(NA_locator) > 0]
variables_to_impute <- union(incomplete_variables,
names(tmp_list_of_types)[tmp_list_of_types == "roundedcont" |
tmp_list_of_types == "interval"])
#evaluate the imputed variables
for(l2 in variables_to_impute){
to_evaluate <- my_data[is.na(data[, l2]), l2, drop = TRUE]
#for some variable types these imputed data coonet be directly evaluated by taking the mean.
# For example categorical variables (including binary variables which might not be 0-1 coded but
# for example "m"-"f".)
#in other variables (interval and rounded continuous) not only the missing variables,
# but all values which received a new value shall be evaluated.
#Therefore the types of the variables are needed
if(is.null(list_of_types)){
if(is.list(rounding_degrees)){
rounding_degrees_tmp <- rounding_degrees[[l2]]
}else{
rounding_degrees_tmp <- rounding_degrees
}
tmp_type <- get_type(data[, l2], rounding_degrees = rounding_degrees_tmp)
}else{
tmp_type <- list_of_types[[l2]]
}
if(tmp_type == "roundedcont"){
eval_index <-
apply(outer(decompose_interval(interval = data[, l2])[, "precise"], rounding_degrees[-1], '%%') == 0, 1, any) |
!is.na(decompose_interval(interval = data[, l2])[, "lower_imprecise"]) | is.na(data[, l2])
to_evaluate <- my_data[eval_index, l2, drop = TRUE]
}
if(tmp_type == "interval"){
eval_index <- !is.na(decompose_interval(interval = data[, l2])[, "lower_imprecise"]) | is.na(data[, l2])
to_evaluate <- my_data[eval_index, l2, drop = TRUE]
}
if(tmp_type == "binary" |
tmp_type == "categorical" | tmp_type == "ordered_categorical"){
eval_index <- is.na(data[, l2])
to_evaluate <- as.numeric(as.factor(my_data[, l2]))[eval_index]
print(l2)
}
my_chainMean[l2, l1, i] <- mean(to_evaluate)
my_chainVar[l2, l1, i] <- stats::var(to_evaluate)
}
}
#Show the progress of the imputation:
progress <- floor((i/M) / 0.04)
if(progress > tickts_drawn){
cat(substr(progress_plot,
start = tickts_drawn + 1, stop = progress))
tickts_drawn <- progress
}
#Store the data, but only those variables that were also in the original data,
#even if they were droped because of colinearity.
#An added intercept variable wont be exported.
alldata[[i]] <- data
variables_to_export <- colnames(my_data) %in% colnames(data)
alldata[[i]][, variables_to_export] <- my_data[, variables_to_export, drop = FALSE]
}
# -------------- SET UP MIDS OBJECT --------------
imp_hmi <- stats::setNames(vector("list", ncol(data)), colnames(data))
method <- unlist(list_of_types_maker(data))
names(method) <- colnames(data)
for(l2 in variables_to_impute){
##get the number of missing values in each incomplete variable
# interval data are treated as missing
if(is.list(rounding_degrees)){
rounding_degrees_tmp <- rounding_degrees[[l2]]
}else{
rounding_degrees_tmp <- rounding_degrees
}
if(get_type(data[, l2], rounding_degrees = rounding_degrees_tmp) == "interval" |
get_type(data[, l2], rounding_degrees = rounding_degrees_tmp) == "roundedcont"){
data[, l2] <- NA
}
n_mis_j <- sum(is.na(data[, l2]))
imp_hmi[[l2]] <- as.data.frame(matrix(NA, nrow = n_mis_j, ncol = M,
dimnames = list(NULL, 1:M)))
for(i in 1:M){
# save only the imputed values of each variable across the different imputation runs.
# remember: alldata is a list with M elements, each element contains the whole data.frame
# which is the original dataframe, where the NAs have been replaced by imputed values.
# We want imputed_values to be a data.frame, so that we can store its colname
var_of_interest <- alldata[[i]][, l2, drop = FALSE]
imputed_values <- var_of_interest[is.na(data[, l2]), , drop = FALSE]
imp_hmi[[l2]][, i] <- imputed_values
rownames(imp_hmi[[l2]]) <- rownames(imputed_values)
}
}
nmis <- apply(is.na(data), 2, sum)
predictorMatrix <- matrix(0, nrow = ncol(data), ncol = ncol(data))
rownames(predictorMatrix) <- colnames(data)
colnames(predictorMatrix) <- colnames(data)
predictorMatrix[variables_to_impute, ] <- 1
diag(predictorMatrix) <- 0
visitSequence <- (1:ncol(data))[apply(is.na(data), 2, any)]
names(visitSequence) <- colnames(data)[visitSequence]
loggedEvents <- data.frame(it = 0, im = 0, co = 0, dep = "",
meth = "", out = "")
#Not supported yet
form <- vector("character", length = ncol(data))
post <- vector("character", length = ncol(data))
midsobj <- list(call = call, data = data,
m = M, nmis = nmis, imp = imp_hmi, method = method,
predictorMatrix = predictorMatrix, visitSequence = visitSequence,
form = form, post = post, seed = NA, iteration = maxit,
lastSeedValue = .Random.seed,
chainMean = my_chainMean,
chainVar = my_chainVar,
loggedEvents = loggedEvents)
oldClass(midsobj) <- "mids"
# If the user wants to use mice as a pooling function, and not hmi_pool,
# we conduct the pooling right away using the model_formula given by the user
# ! Note: here we add something not found in mids-objects from mice !
if(pool_with_mice & !is.null(model_formula_org)){
if(fe$clID_varname == ""){ # if no cluster ID was found, run a single level model
midsobj$pooling <- mice::pool(with(data = midsobj,
expr = stats::lm(formula = formula(format(model_formula_org)))))
}else{ # otherwise a multilevel model
midsobj$pooling <- mice::pool(with(data = midsobj,
expr = lme4::lmer(formula = format(model_formula_org))))
}
}
#In addition to classical mids objects we add the chains of the gibbs-samplings from MCMCglmm.
midsobj$gibbs <- gibbs
midsobj$gibbs_para <- c(nitt, burnin, thin)
return(midsobj)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.