R/smind.R

Defines functions smind

Documented in smind

#'  Structural missingness indicator
#'
#' Structurally missingness indicator creates a matrix bij which contains the structural missingness indicator for each element of a filtered variable group.
#' @author Beat Hulliger, Juan Berdugo
#' @param data (mandatory): A dataframe containing the data to be processed.
#' @param filtering_var (mandatory): Column numbers of the variables which determine the filter.
#' @param filtered_var (mandatory): Column numbers of the variables which are filtered.
#' @param filter_values (mandatory): Values to be checked against the filtering variables. e.g. c("",NA,1,-1, -2).
#' @param positive (optional): When positive = 0 (default) the filtered variables are considered structurally missing if \strong{all} of the filtering variables have a value among the filter values. When positive = 1, the filtered variables are considered structurally missing if \strong{any} of the filtered variables has a value among the filter values.
#' @param bij (optional): A preexisting structural missingness indicator matrix.
#' @return A matrix bij which contains the missingness indicator for each element of the filtered variables, given a list of filtering variables and a list of filtered variables, as well as a list of structurally missingness elements.
#' @export


smind <- function(data, filtering_var, filtered_var, filter_values, positive = 0,bij)

{

  #var.sie <- vargroup("statusinemployment",1:10,data)
  #filter_values <- c(1)
  #filtering_var <- vargroup("typeofownership",3:5,data)
  #filtered_var <- pmatch("rentnet",tolower(names(data)))
  #data[,filtering_var]


  sizedata<- dim(data)
  n <- sizedata[1]
  p <- sizedata[2]

  p_filtering_var <- length(filtering_var)
  p_filtered_var <- length(filtered_var)

  #Check if the dataset has enough columns
  if (p <= p_filtering_var)
  {
    cat(paste("Dataset should contain at least", p_filtering_var ,"variables.", "Only ", p, "Variable(s) could be found.\n" ))
    break
  }

  #Check if the dataset contains the provided columns
  if ((p_filtering_var > p) | (p_filtered_var > p))
  {
    cat("Variable(s) not found in the provided dataset.\n")
    break
  }

  #Create the bij matrix. If it already exists, the creation process is omitted.

  if (missing(bij)) {
    bij <- !is.na(data)
    bij[bij==TRUE] <- FALSE
    cat("Argument bij not found. A new bij matrix has been created.\n")
  }

  sizebij<- dim(bij)

  if (identical(sizedata,sizebij)==FALSE)
  {
    bij <- !is.na(data)
    bij[bij==TRUE] <- FALSE
    cat("Previously existing bij was deleted since the dataset does not match the size of bij.\n")
  }

  # Initialise the filter condition matrix (one column for each filtering var)
  bc <- matrix(0,ncol=p_filtering_var,nrow=n)
  # Calculate match between filtering variables and filter values
  for (j in 1:p_filtering_var) {
    bc[,j] <- match(data[,filtering_var[j]],filter_values,nomatch=0)
  }

  # Establishing filter condition (a vector with TRUE/FALSE)
  # simplest case: structural missingness condition is given if in
  # all of the filtering variables a filter-value is observed. data[,filtering_var]

  if (positive == 0)
  {
    b.condition <- as.integer(apply(bc > 0,1,sum) >= p_filtering_var)
  }else if (positive == 1)
  {
    b.condition <- as.integer(apply(bc > 0,1,sum) > 0)
  }
  else if ((positive != 0) & (positive != 1))
    {
    cat("Invalid Argument - - Positive - -  only 1 or 0 is accepted.\n")
    break
    }

#data[,filtered_var]

  # Set column of filtered var to 0
  bij[,filtered_var]<- 0

  # Place b.condition in each column of filtered var of bij
  bij[,filtered_var]<- matrix(rep(b.condition,each=p_filtered_var),ncol=p_filtered_var,byrow=T)
  storage.mode(bij) <- "integer" # to make sure it is stored as integer

   cat(paste("Mean structural missingness of filtered variables: ", mean(bij[,filtered_var])))
  return(bij)

}

Try the sdap package in your browser

Any scripts or data that you put into this service are public.

sdap documentation built on May 2, 2019, 6:52 p.m.