R/load_retailrocket_data.R

Defines functions load_retailrocket_data

#' Function to load retailrocket dataset into memory
#'
#' @param filepath character, path to data folder on local disc
#'
#' @return list of data.table, events, properties and category datasets
#' @export
#'
#' @import data.table magrittr
#'
#' @examples
#'\dontrun{
#'load_retailrocket_data("~/CloudStation/Projekte/r_projects/GoIT/retailrocket-recommender-system-dataset/")
#'}
load_retailrocket_data <- function(filepath) {
  ## Those are the filenames of the kaggle dataset
  filename.event <- "events.csv"
  filename.properties <- c("item_properties_part1.csv", "item_properties_part2.csv")
  filename.category <- "category_tree.csv"

  ## Using data.table's fread function to import data efficiently
  ##
  events <- fread(paste0(filepath, filename.event))

  ## The property columns code 1104 propertries. For our purpose we only need
  ## availabe and categoryid for now.
  properties <- paste0(filepath, filename.properties) %>%
    lapply(fread) %>%
    rbindlist()
    #.[property %chin% c("available", "categoryid")]

  ## Bundle data to less detailed categories
  category <- fread(paste0(filepath, filename.category))

  ## return data as list
  list(
    events = events,
    properties = properties,
    category = category
  )
}



# item_cat <- properties[property == "categoryid", .(category = value), by = .(itemid)]
# setkey(item_cat, itemid)
# item_cat[events]
mifek/bancommender documentation built on Nov. 25, 2019, 11:40 a.m.