inst/user2020_tutorials/episode2_branches/scripts/pull_data.R

pull_house_data <- function(self){
  suppressPackageStartupMessages(library(data.table))
  dt <- fread("./data/raw-house-data.csv") 

  dt$zipcode <- as.character(dt$zipcode)

  # convert date string to categorical year, month, weekday 
  dates <- as.Date(dt$date, format="%Y%m%dT000000")
  dt$year <- as.factor(year(dates))
  dt$month <- as.factor(months(dates))
  dt$weekday <- as.factor(weekdays(dates))

  # remove id and date columns
  dt[,  date:=NULL]
  dt[,  id:=NULL]
  dt[,  zipcode:=NULL]

  # saving as metaflow artifact
  ### write.csv(dt, "./data/house_price.csv", row.names=FALSE, quote=FALSE)
  ### return(dt)
  self$dt <- dt
}
Netflix/user2020-metaflow-tutorial documentation built on Aug. 8, 2020, 12:03 a.m.