R/dummy.R

Defines functions dummy

Documented in dummy

#' Dummy Variable Encoding
#'
#' Creates a dummy variable for a column and removes that column from the dataset.
#' Can be used within a dplyr chain.
#'
#' @param data A dataframe
#' @param col Character list of the factor column(s) to be transformed into dummy variables.
#' @return The dataframe with the new dummy variable columns.
#' @export


dummy <- function(data, col) {
  for (c in col) {
    idx <- which(names(data)==c)
    v <- data[[idx]]
    stopifnot(Reduce("|",class(v)=="factor"))
    m <- matrix(0, nrow=nrow(data), ncol=nlevels(v))
    m[cbind(seq_along(v), as.integer(v))]<-1
    colnames(m) <- paste(c, levels(v), sep="_")
    r <- data.frame(m)
    if ( idx>1 ) {
      r <- cbind(data[1:(idx-1)],r)
    }
    if ( idx<ncol(data) ) {
      r <- cbind(r, data[(idx+1):ncol(data)])
    }
    data <- r
  }
  data
}
blazickjoe/DataScienceLibrary documentation built on Nov. 5, 2019, 2:26 p.m.