R/imi.R
In sdap: Statistical Data Preparation Indicators

Documented in imi

#'  Imputation Impact IMI
#'
#' The aggregated imputation impact for variable groups allows measuring the effect of imputations on the distribution of categoric variables (exclusive response groups).
#' @author Beat Hulliger - Juan Berdugo
#' @param data (mandatory): A dataframe containing the data to be processed.
#' @param bij (optional): A matrix containing the structurally missingness indicators. bij can be calculated using the function \code{\link[sdap]{smind}}. If the argument bij is missing, the indicator is calculated without considering a missingness indicators matrix.
#' @param gij (mandatory): A matrix containing the imputation indicators for a given dataframe. gij can be calculated using the function \code{\link[sdap]{impind}}.
#' @param obsi (optional): A vector with the observations in rij to to be processed. If the argument is missing, all observations are processed.
#' @param varj (optional): A vector with the variables (column numbers) to be considered for the calculation. If the argument varj is missing, all variables are considered for the indicator.
#' @param weight (optional): A vector of weights to be considered when calculating the indicator. Default weight is 1.
#' @return A list with the following elements: variables (variables), observations (observations), Number of imputations detected (imputations), Indicator IMI (imi).
#' @export


imi <- function(data,bij,gij,obsi=1:nrow(gij),varj=1:ncol(gij),weight)

{

  #obsi <- 1:nrow(gij)
  #varj <- var.sie
  #weight <- weight.rent

  n <- length(obsi)
  p <- length(varj)

  if (missing(weight)) weight <- rep(1,n)


  #Check existence of gij

  if (missing(gij)) {
    cat("Missing gij!\n")
    break
  }

  #Check existence of bij

  if (missing(bij))
    {
    cat("Missing bij!\n")
    break
  }


  #store the size of r1ij, bij and gij

  sizedata <- as.double(dim(data))
  sizebij <- as.double(dim(bij))
  sizegij <- as.double(dim(gij))

  #check if the sizes of bij and gij match

  sizeindicators<- (sizebij+sizegij)/2

  #check if the sizes of r1ij, bij and gij match

  if (!identical(sizeindicators, sizedata))
  {
    print("The sizes of the datasets do not match. Please recalculate bij and/or gij and/or r1ij.")
    break
  }else
  {
    print("Datasets sizes ok")
  }

  yij <- as.matrix(data[obsi,varj])
  class(yij) <- "numeric"
  yij[is.na(yij)] <- 0

  # Calculate denominator of the function. If it is zero, return zero and break.

  denominator <- weight*(1-bij[obsi,varj])*yij
  denominator <- colSums(denominator)

  numerator <- (weight*(1-bij[obsi,varj])*(gij[obsi,varj])*yij)
  numerator <- colSums(numerator)
  numerator <- numerator * numerator

  imi.value <- numerator / denominator
  imi.value <- sum(imi.value) ^(1/2)
  imi.value <- imi.value / (sum(weight)*p)^(1/2)



  imi.value<- list(variables = varj, observations = obsi, imi=imi.value )

  return(imi.value)



}