R/data_clean.R

Defines functions handle_missing gen_at_list at_transforms at_transform

# Transform data (big wrapper)
# Would use `transform`, but the syntax for factors is annoying
at_transform <- function(var, value){

  # var: vector
  # value: character or numeric, value of variable
  #
  # return: vector with transformed variable

  if(is.factor(var)){
    factor(rep(value, length(var)), levels = levels(var))
  } else {
    rep(value, length(var))
  }

}

# Apply multiple transformations
at_transforms <- function(model_df, at_list){

  # model_df: dataframe used in model (not model.matrix)
  # at_list: list of transformations, in the format of
  #          `list("variable" = c("values"))`
  #
  # return: list of dataframes, each transformed

  # Figure out all transformations
  all_combos <- expand.grid(at_list)

  # Allocate vector to hold them
  df <- vector(mode = 'list', length  = nrow(all_combos))

  # Loop through all combinations
  for(i in seq_len(nrow(all_combos))){

    df_tmp <- model_df

    for(j in names(all_combos)){
      df_tmp[[j]] <- at_transform(var = df_tmp[[j]], value = all_combos[i, j])
    }

    df[[i]] <- df_tmp
  }

  # Give names to list
  names(df) <- apply(all_combos, 1, FUN = function(x){
    paste(names(all_combos), "=", x, collapse = ' ')
  })

  # Return
  df
}

# Generate "at" transformation list for a single variable
gen_at_list <- function(df, var_interest, at_var_interest = NULL){

  # df: dataframe of values
  # var_interest: character, variable of interest
  # at_var_interest: vector, levels of variables of interest, defaults to NULL
  #
  # Return named list of all values for variable of interest

  stopifnot(var_interest %in% names(df))

  if(is.null(at_var_interest)){
    # Get all unique values
    val_interest <- unique(df[[var_interest]])
    # order and put into list
    val_interest <- list(val_interest[order(val_interest)])
  } else {
    val_interest <- list(at_var_interest)
  }

  # Give name to list
  names(val_interest) <- var_interest

  val_interest
}

# Handle missing data and weights

handle_missing <- function(model, data, weights, nrow_orig){

  # Add weights
  if(is.null(weights)) weights <- rep(1, nrow_orig)

  # Keep completes only
  miss <- rowSums(is.na(data)) > 0 | is.na(weights)
  weights <- weights[! miss]
  data <- data[! miss, , drop = FALSE]

  # Remove any booleans
  if(all(data$`T` %in% c(TRUE, NA)))
    data$`T` <- NULL
  if(all(data$`F` %in% c(FALSE, NA)))
    data$`F` <- NULL

  # Throw warning if rows were dropped
  if(nrow(data) != nrow_orig)
    warning(sprintf('Dropping %s rows due to missing data',
                    nrow_orig - nrow(data)))

  list(data = data,
       weights = weights)

}

Try the modmarg package in your browser

Any scripts or data that you put into this service are public.

modmarg documentation built on Nov. 23, 2020, 1:07 a.m.