R/fill_time_gaps.R

#' @export
fillTimeGaps <- function(data, group="user_id", date="date", ranges, min_date="min_date", max_date="max_date", interval = "day", fillNA=T){
  library(lubridate); library(sqldf)
  options(scipen = 999, digits = 5, sqldf.driver = "SQLite",gsubfn.engine = "R")

  if(missing(ranges)){
    ranges <-
      data %>%
      group_by( .dots = group ) %>%
      summarize(
        min_date = min( eval(parse(text=date)) , na.rm=T),
        max_date = max( eval(parse(text=date)) , na.rm=T)
      )

  } else {
    colnames(ranges)[colnames(ranges) == min_date] <- "min_date"
    colnames(ranges)[colnames(ranges) == max_date] <- "max_date"
  }

  dates <- data_frame( date = seq(min(ranges$min_date, na.rm=T), max( ranges$max_date, na.rm=T),by = sprintf("+1 %s", interval)) )
  colnames(dates)[1] <- date


  query <- sprintf({"
    SELECT d.%1$s,r.%2$s
    FROM ranges r
    LEFT JOIN dates d on
      d.%1$s >= r.min_date and
      d.%1$s <= r.max_date
    "}, date,group)
  date_ranges <- sqldf(query)

  return(left_join(date_ranges, data))



}
gogonzo/oddsandsods documentation built on May 12, 2019, 1:35 a.m.