R/makeBinVars.R

#' makeBinVars
#'
#' This function will create a set of n binary variables to indicate the presence or absence of a
#' each element of a user-defined vector of n strings, where the string of interest may appear in
#' one or more data.frame columns.
#' @param codes A character vector of the codes of interest.
#' @param dfCols Either a character vector reflecting the names of data columns to look through for x, or a numeric vector of column indices.
#' @param data A dataframe containing individual claims-level data.
#' @param key Either a character vector or column indices to indicate the unique key necessary to allow for merging back to original dataset.
#' @param outName Name for the list object given as output of this function.
#' "newBinVars" will be applied as default unless another string is given.
#' @export

makeBinVars <- function(codes, dfCols, data, key, outName="newBinVars") {
  #first job, get the column indices for each element of dfCols
  colNums <- if(is.numeric(dfCols)) {dfCols} else{which(names(data) %in%  dfCols)}

  #next, get column indices for each element of to the key argument
  keyNums <- if(is.numeric(key)) {key} else{which(names(data) %in%  key)}

  #next extract the key object from the dataframe so that it can be pasted to newBinVar before it is returned
  keyList <- lapply(as.list(keyNums), function (b) data[,b])
  names(keyList) <- names(data)[keyNums]


  #next, make the character vector codes into a list
  xlist <- as.list(codes)

  #next, create the new variables, and store results as a list
  newVars <- lapply(xlist, function (b) ifelse(rowSums(data[, colNums]== b, na.rm=TRUE) > 0,1,0))

  #next, name the variable something sensible
  names(newVars) <-  paste0("b_", codes)

  #next, paste together the keyNums and the newVars object
  newVars <- append(keyList,newVars)

  #next, convert this list into a data.frame
  newVars <- as.data.frame(newVars)

  #next, name the list containing the new variables
  assign(outName, newVars, pos = 1)
}
etesdahl/claimWranglR documentation built on May 16, 2019, 8:55 a.m.