consLettersUtils: Conservation Letters Utilities

Documented in getMeanImp

# =============================================================================
# getMeanImp
# earlycapistran@comunidad.unam.mx - August 2020
# =============================================================================

#' This function takes a multiply imputed dataset ('mids' object), groups 
#' imputed values by the independent variable and calculates the mean across 
#' all 'm' imputed datasets for univariate time series data.
#'
#' @param data Original univariate time series with missing values
#' @param mids A 'mids' objects generated by 'mice'
#' @param xVarName A character string with the name of the predictor variable
#' in the original dataset
#' @param yVarName A character string with the name of the response variable in 
#' 'mids' object
#' @return A data frame with a column for mean response variables grouped by the
#' corresponding predictor variable value for which they were imputed
#' @export
#' 
#' @usage
#' getMeanImp(mids, data, xVarName, yVarName)
#' 
#' @importFrom tidyr pivot_longer
#' @importFrom dplyr filter
#' @importFrom dplyr group_by
#' @importFrom dplyr summarise
#' @importFrom dplyr select
#' @importFrom magrittr %>% 
#' @importFrom mice is.mids

# To run this function, you must have 'dplyr' installed
getMeanImp <- function(mids, data, xVarName, yVarName) {
  x <- y <- NULL
  if (!is.mids(mids)) 
    stop("The data must have class 'mids'")
  if (!is.character(xVarName)) 
    stop("'xVarName' must be a character string")
  if (!is.character(yVarName)) 
    stop("'yVarName' must be a character string")
  # Store imputed values as a data frame
  imp_data <- mids$imp
  imp_data <- imp_data[[yVarName]]
  # Find which values in predictor variable correspond to missing values 
  # for response variable
  pred_value <- data %>% 
    dplyr::filter(is.na(data[[yVarName]])) %>% 
    dplyr::select(xVarName)
  # Bind with corresponding values for predictor variable
  imp_data <- cbind(imp_data, pred_value)
  # Pivot data into a long column with all imputed values
  imp_data = imp_data %>% 
    tidyr::pivot_longer(-xVarName, names_to =  "imputedDataset", 
                        values_to = yVarName)
  colnames(imp_data) <- c("x", "imputedDataset", "y")
  # Group by predictor variable value for which they were imputed
  by_x <- imp_data %>% 
    dplyr::group_by(x)
  # Calculate mean imputed values for each predictor value
  imp_means = by_x %>% 
    dplyr::summarise(impMean = mean(y))
  colnames(imp_means) <- c(xVarName, yVarName)
  return(imp_means)
}