R/imir.R
In sdap: Statistical Data Preparation Indicators

Documented in imir

#' Imputation Indicator for Responded Items IMIR
#'
#' Version of imi() for responded items. The aggregated imputation impact for variable groups allows measuring the effect of imputations of values with original response on the distribution of categorical variables.
#' @author Beat Hulliger - Juan Berdugo
#' @param data (mandatory): A dataframe containing the data to be processed.
#' @param r1ij (mandatory): A matrix containing the response indicators for a given dataframe.
#' @param bij (optional): A matrix containing the structurally missingness indicators. bij can be calculated using the function \code{\link[sdap]{smind}}. If the argument bij is missing, the indicator is calculated without considering a missingness indicators matrix.
#' @param gij (mandatory): A matrix containing the imputation indicators for a given dataframe. gij can be calculated using the function \code{\link[sdap]{impind}}.
#' @param obsi (optional): A vector with the observations in rij to be processed. If the argument obs is missing, all observations are processed.
#' @param varj (optional): A vector with the variables (column numbers) to be considered for the calculation. If the argument varj is missing, all variables are considered for the indicator.
#' @param weight (optional): A vector of weights to be considered when calculating the indicator. If no weight vector is given as an argument, the indicator is calculated without considering different weights.
#' @return A list with the following elements: variables (variables), observations (observations), Number of imputations detected (imputations), Indicator IMIR (imir).
#' @export


imir <- function(data,r1ij,bij,gij,obsi=1:nrow(gij),varj=1:ncol(gij),weight)

{

  #obsi <- 1:nrow(gij)
  #varj <- var.sie
  #weight <- weight.others

  n <- length(obsi)
  p <- length(varj)

  if (missing(weight)) weight <- rep(1,n)


  #Check existence of r1ij



  #store the size of r1ij, bij and gij

  sizedata <- as.double(dim(data))
  sizer1ij <- as.double(dim(r1ij))
  sizebij <- as.double(dim(bij))
  sizegij <- as.double(dim(gij))

  #check if the sizes of bij and gij match

  sizeindicators<- (sizebij+sizer1ij+sizegij)/3

  #check if the sizes of r1ij, bij and gij match

#   if (!identical(sizeindicators, sizedata))
#   {
#     print("The sizes of the datasets do not match. Please recalculate bij and/or gij and/or r1ij.")
#     break
#   }else
#   {
#     print("Datasets sizes ok")
#   }

  yij <- as.matrix(data[obsi,varj])
  class(yij) <- "numeric"
  yij[is.na(yij)] <- 0

  # Calculate denominator of the function. If it is zero, return zero and break.

  denominator <- weight * (1-bij[obsi,varj]) * yij
  denominator <- colSums(denominator)

  numerator <- (weight * r1ij[obsi,varj] * (1-bij[obsi,varj])*(gij[obsi,varj]) * yij)
  numerator <- colSums(numerator)
  numerator <- numerator * numerator

  imir.value <- numerator / denominator
  imir.value <- sum(imir.value)
  imir.value <- (imir.value / (sum(weight * rowSums(1-bij[obsi,varj]))))^(1/2)



  imir.list<- list(variables = varj, observations = obsi, imir=imir.value )

  return(imir.list)



}