R/define_var_attribute.R

#' Define a tibble attribute variable
#' @export

define_var_attribute <- function(data
                               ,id
                               ,value
                               ,jitter = Sys.getenv("OLIVER_REPLICA_JITTER")){

  # need to force the data object into a data frame so I can play with it in matrix notation
  # in the lapply call below
  data <- data %>% as_data_frame()

  # calculate the mean of the available data
  mean_value <- lapply(data[,value], mean, na.rm = TRUE)

  # set names for the df
  dots <- setNames(list(lazyeval::interp(~ x
                               ,x = as.name(value)))
                   ,value)

  # initialize the attr table
  attribute <- select_(data
                       ,id
                       ,value) %>%
    as_data_frame() %>%
    mutate_(.
            ,.dots = dots) %>%
    rename_(., .dots = setNames(value, "attr_values"))

  # if TRUE, apply jitter for logical values
  if (lapply(data[,value], class) == 'logical') {

    attribute <- attribute %>%
      mutate(attr_values = if(jitter){runif(n())} else attr_values
             ,attr_values = ifelse(attr_values > mean_value, TRUE, FALSE))

  # if TRUE, apply jitter for integer and double values
  } else if (any(lapply(data[,value], class) == 'integer'
                     ,lapply(data[,value], class) == 'double')) {
    attribute <- attribute %>%
      mutate(attr_values = if(jitter){attr_values + rbinom(n = n()
                                                           ,size = round(as.numeric(mean_value)
                                                                         ,digits = 0)
                                                           ,prob = runif(1))
        } else attr_values)
  }

  return(attribute)

}
mienkoja/oliveR documentation built on May 6, 2019, 6:01 p.m.