R/dateadd.R

Defines functions datepart asDate datediff dateadd

Documented in asDate dateadd datediff datepart

# Copyright 2024 DARWIN EU®
#
# This file is part of CDMConnector
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#' Add days or years to a date in a dplyr query
#'
#' This function must be "unquoted" using the "bang bang" operator (!!). See example.
#'
#' @param date The name of a date column in the database table as a character string
#' @param number The number of units to add. Can be a positive or negative whole number.
#' @param interval The units to add. Must be either "day" (default) or "year"
#'
#' @return Platform specific SQL that can be used in a dplyr query.
#' @export
#' @importFrom rlang !!
#'
#' @examples
#' \dontrun{
#' con <- DBI::dbConnect(duckdb::duckdb())
#' date_tbl <- dplyr::copy_to(con, data.frame(date1 = as.Date("1999-01-01")),
#'                            name = "tmpdate", overwrite = TRUE, temporary = TRUE)
#'
#' df <- date_tbl %>%
#'   dplyr::mutate(date2 = !!dateadd("date1", 1, interval = "year")) %>%
#'   dplyr::collect()
#'
#' DBI::dbDisconnect(con, shutdown = TRUE)
#' }
dateadd <- function(date, number, interval = "day") {
  checkmate::assertCharacter(interval, len = 1)
  checkmate::assertSubset(interval, choices = c("day", "year"))
  checkmate::assertCharacter(date, len = 1)
  if(!(checkmate::testCharacter(number, len = 1) || checkmate::testIntegerish(number, len = 1))) {
    rlang::abort("`number` must a character string with a column name or a number.")
  }

  dot <- get(".", envir = parent.frame())
  db <- CDMConnector::dbms(dot$src$con)

  if (db %in% c("oracle", "snowflake")) {
    date <- as.character(DBI::dbQuoteIdentifier(dot$src$con, date))
    if (is.character(number)) {
      number <- as.character(DBI::dbQuoteIdentifier(dot$src$con, number))
    }
  }

  if (db %in% c("spark", "oracle") && interval == "year") {
    # spark and oracle sql requires number of days in dateadd
    if (is.numeric(number)) {
      number <- floor(number*365.25)
    } else {
      number <- paste(number, "* 365.25")
    }
  }

  sql <- switch (db,
                 "redshift" = glue::glue("DATEADD({interval}, {number}, {date})"),
                 "oracle" = glue::glue("({date} + NUMTODSINTERVAL({number}, 'day'))"),
                 "postgresql" = glue::glue("({date} + {number}*INTERVAL'1 {interval}')"),
                 "sql server" = glue::glue("DATEADD({interval}, {number}, {date})"),
                 "spark" = glue::glue("date_add({date}, {number})"),
                 "duckdb" = glue::glue("({date} + {number}*INTERVAL'1 {interval}')"),
                 "sqlite" = glue::glue("CAST(STRFTIME('%s', DATETIME({date}, 'unixepoch', ({number})||' {interval}s')) AS REAL)"),
                 "bigquery" = glue::glue("DATE_ADD({date}, INTERVAL {number} {toupper(interval)})"),
                 "snowflake" = glue::glue('DATEADD({interval}, {number}, {date})'),
                 rlang::abort(glue::glue("Connection type {paste(class(dot$src$con), collapse = ', ')} is not supported!"))
  )
  dbplyr::sql(as.character(sql))
}


#' Compute the difference between two days
#'
#' This function must be "unquoted" using the "bang bang" operator (!!). See example.
#'
#' @param start The name of the start date column in the database as a string.
#' @param end The name of the end date column in the database as a string.
#' @param interval The units to use for difference calculation. Must be either "day" (default) or "year".
#'
#' @return Platform specific SQL that can be used in a dplyr query.
#' @export
#' @importFrom rlang !!
#'
#' @examples
#' \dontrun{
#' con <- DBI::dbConnect(duckdb::duckdb())
#' date_tbl <- dplyr::copy_to(con, data.frame(date1 = as.Date("1999-01-01")),
#'                            name = "tmpdate", overwrite = TRUE, temporary = TRUE)
#'
#' df <- date_tbl %>%
#'   dplyr::mutate(date2 = !!dateadd("date1", 1, interval = "year")) %>%
#'   dplyr::mutate(dif_years = !!datediff("date1", "date2", interval = "year")) %>%
#'   dplyr::collect()
#'
#' DBI::dbDisconnect(con, shutdown = TRUE)
#' }
datediff <- function(start, end, interval = "day") {
  checkmate::assertCharacter(interval, len = 1)
  checkmate::assertSubset(interval, choices = c("day", "month", "year"))
  checkmate::assertCharacter(start, len = 1)
  checkmate::assertCharacter(end, len = 1)

  dot <- get(".", envir = parent.frame())
  db <- CDMConnector::dbms(dot$src$con)


  if (interval == "day") {
    if (db == "oracle") {
      start <- as.character(DBI::dbQuoteIdentifier(dot$src$con, start))
      end <- as.character(DBI::dbQuoteIdentifier(dot$src$con, end))
    }

    sql <- switch (
      db,
      "redshift" = glue::glue("DATEDIFF(day, {start}, {end})"),
      "oracle" = glue::glue("CEIL(CAST({end} AS DATE) - CAST({start} AS DATE))"),
      "postgresql" = glue::glue("(CAST({end} AS DATE) - CAST({start} AS DATE))"),
      "sql server" = glue::glue("DATEDIFF(day, {start}, {end})"),
      "spark" = glue::glue("datediff({end},{start})"),
      "duckdb" = glue::glue("datediff('day', {start}, {end})"),
      "sqlite" = glue::glue("(JULIANDAY(end, 'unixepoch') - JULIANDAY(start, 'unixepoch'))"),
      "bigquery" = glue::glue("DATE_DIFF({end}, {start}, DAY)"),
      "snowflake" = glue::glue('DATEDIFF(day, "{start}", "{end}")'),
      rlang::abort(glue::glue("Connection type {paste(class(dot$src$con), collapse = ', ')} is not supported!"))
    )
  } else {
    # datepart will quote oracle names
    dayStart   <- datepart(start, "day",   db)
    monthStart <- datepart(start, "month", db)
    yearStart  <- datepart(start, "year",  db)
    dayEnd     <- datepart(end,   "day",   db)
    monthEnd   <- datepart(end,   "month", db)
    yearEnd    <- datepart(end,   "year",  db)
    if (interval == "month") {
      sql <- glue::glue(
        "FLOOR(({yearEnd} * 1200 + {monthEnd} * 100 + {dayEnd} -
         ({yearStart} * 1200 + {monthStart} * 100 + {dayStart})) / 100)"
      )
    } else {
      sql <- glue::glue(
        "FLOOR(({yearEnd} * 10000 + {monthEnd} * 100 + {dayEnd} -
         ({yearStart} * 10000 + {monthStart} * 100 + {dayStart})) / 10000)"
      )
    }
  }

  dbplyr::sql(as.character(sql))
}

#' as.Date dbplyr translation wrapper
#'
#' This is a workaround for using as.Date inside dplyr verbs against a database
#' backend. This function should only be used inside dplyr verbs where the first
#' argument is a database table reference. `asDate` must be unquoted with !! inside
#' dplyr verbs (see example).
#'
#' @param x an R expression
#'
#' @export
#' @examples
#' \dontrun{
#' con <- DBI::dbConnect(odbc::odbc(), "Oracle")
#' date_tbl <- dplyr::copy_to(con,
#'                            data.frame(y = 2000L, m = 10L, d = 10L),
#'                            name = "tmp",
#'                            temporary = TRUE)
#'
#' df <- date_tbl %>%
#'   dplyr::mutate(date_from_parts = !!asDate(paste0(
#'     .data$y, "/",
#'     .data$m, "/",
#'     .data$d
#'   ))) %>%
#'   dplyr::collect()
#' }
asDate <- function(x) {
  lifecycle::deprecate_soft("1.4.1", "CDMConnector::asDate()", "as.Date()")
  x_quo <- rlang::enquo(x)
  .data <- get(".", envir = parent.frame())
  dialect <- CDMConnector::dbms(.data$src$con)

  if (dialect == "oracle") {
    x <- dbplyr::partial_eval(x_quo, data = .data)
    x <- dbplyr::translate_sql(!!x, con = .data$src$con)
    x <- glue::glue("TO_DATE({x}, 'YYYY-MM-DD')")
    return(dplyr::sql(x))
  } else if (dialect == "spark") {
    x <- dbplyr::partial_eval(x_quo, data = .data)
    x <- dbplyr::translate_sql(!!x, con = .data$src$con)
    x <- glue::glue("TO_DATE({x})")
    return(dplyr::sql(x))
  } else {
    return(rlang::expr(as.Date(!!x_quo)))
  }
}

#' Extract the day, month or year of a date in a dplyr pipeline
#'
#' @param date Character string that represents to a date column.
#' @param interval Interval to extract from a date. Valid options are "year", "month", or "day".
#' @param dbms Database system, if NULL it is auto detected.
#'
#' @export
#' @examples
#' \dontrun{
#' con <- DBI::dbConnect(duckdb::duckdb(), ":memory:")
#' date_tbl <- dplyr::copy_to(con,
#'                            data.frame(birth_date = as.Date("1993-04-19")),
#'                            name = "tmp",
#'                            temporary = TRUE)
#' df <- date_tbl %>%
#'   dplyr::mutate(year = !!datepart("birth_date", "year")) %>%
#'   dplyr::mutate(month = !!datepart("birth_date", "month")) %>%
#'   dplyr::mutate(day = !!datepart("birth_date", "day")) %>%
#'   dplyr::collect()
#' DBI::dbDisconnect(con, shutdown = TRUE)
#' }
datepart <- function(date, interval = "year", dbms = NULL) {
  checkmate::assertCharacter(date, len = 1)
  checkmate::assertChoice(interval, c("year", "month", "day"))
  supported <- c("redshift", "oracle", "postgresql", "sql server", "spark", "duckdb", "sqlite", "bigquery", "snowflake")
  checkmate::assertChoice(dbms, choices = supported, null.ok = TRUE)

  if (is.null(dbms)) {
    dot <- get(".", envir = parent.frame())
    dbms <- CDMConnector::dbms(dot$src$con)
  }

  sql <- switch (dbms,
    "redshift" = "DATE_PART({interval}, {date})",
    "oracle" = 'EXTRACT({toupper(interval)} FROM "{date}")',
    "postgresql" = "EXTRACT({toupper(interval)} FROM {date})", # TODO use a more dbplyr approach to build sql
    "sql server" = "{toupper(interval)}({date})",
    "spark" = "{toupper(interval)}({date})",
    "duckdb" = "date_part('{interval}', {date})",
    "sqlite" = ifelse(interval == "year",
      "CAST(STRFTIME('%Y', {date}, 'unixepoch') AS INT)",
      "CAST(STRFTIME('%{substr(interval, 1, 1)}', {date}, 'unixepoch') AS INT)"),
    "bigquery" = "EXTRACT({toupper(interval)} from {date})",
    "snowflake" = 'DATE_PART({interval}, "{date}")'
  )
  dbplyr::sql(as.character(glue::glue(sql)))
}

Try the CDMConnector package in your browser

Any scripts or data that you put into this service are public.

CDMConnector documentation built on April 4, 2025, 4:42 a.m.