R/prophet_model_v01.R

Defines functions interpolate_missing_dates

Documented in interpolate_missing_dates

#' interpolate_missing_dates
#' @param tisefka original data with missing and outliers
#' @export

interpolate_missing_dates <- function(tisefka = NULL, target_variable = NULL, date_unit = "months"){

  if(is.null(date_unit))date_unit <- SaldaeDataExplorer::detect_date_auto(tisefka$date, n_new_dates  = NULL)
  if(date_unit %in%c("months","quarters","years")){
    tisefka$date <- tisefka$date - tisefka$date%>%lubridate::day() +1
    # tisefka$date <- as.character(tisefka$date + tisefka$date%>%lubridate::days_in_month() - 1)

  }

  start_date <- tisefka$date%>%head(1)%>%as.Date(tz = "CET")
  end_date <- tisefka$date%>%tail(1)%>%as.Date(tz = "CET")
  tisefka <- tisefka%>%
    tidyr::complete(date = seq.Date(start_date,end_date, by=date_unit))%>%
    dplyr::distinct_at(dplyr::vars(date),.keep_all = TRUE)
  ts_missings <- sum(!is.na(tisefka%>%dplyr::pull(!!target_variable)))
  if(ts_missings < 3){
    mean_na <- tisefka%>%dplyr::pull(!!target_variable)%>%mean(na.rm = TRUE)
    replace_na_list <- list()
    replace_na_list[[target_variable]] <-  mean_na
    tisefka <- tisefka %>%
      tidyr::replace_na(replace_na_list)

  }else if(ts_missings < nrow(tisefka)){
    tisefka[,target_variable] <- tisefka%>%dplyr::pull(!!target_variable)%>%SaldaeDataExplorer:::interp_na_value()
  }

  non_zero_values <- tisefka%>%dplyr::pull(!!target_variable)%>%filter_zero_values(ts_actuals = .)
  tisefka <- tisefka[non_zero_values,]
  return(tisefka)
}

#' Saldae Engine using prophet package.
#' @author Farid Azouaou
#' @param tisefka  training data (data frame with time variable as rownames)
#' @param sald_holidays  information regarding holdays(based on country)
#' @param target_variable target variable
#' @return fitted object from Prophet

prophet_fit <- function(tisefka = NULL,target_variable= NULL ,sald_holidays = NULL, ukud_unit = NULL) {
  # ..........Prepare data as required by the package .
  history <- data.frame(
    ds = tisefka$date,
    y = dplyr::pull(tisefka,!!target_variable)
  )
  daily_seasonality <- weekly_seasonality <- "auto"

  if(ukud_unit == "months")daily_seasonality <- weekly_seasonality <- FALSE
  if(ukud_unit == "weeks")weekly_seasonality <- FALSE

  # .............................. model fitting step...........
  prophet_object <- prophet::prophet(df = history, fit = TRUE, holidays = sald_holidays, weekly.seasonality = weekly_seasonality,
                                     daily.seasonality	= daily_seasonality
)
  #------------------------------------------------------------
  return(prophet_object)
}


generate_future_dates <- function(tisefka = NULL, ukud_unit = NULL, asurif_zdat = 18 ){
  tisefka <- tisefka%>%
    dplyr::arrange(date)

  if(ukud_unit %in% c("months","quarters","years"))tisefka$date <- tisefka$date - tisefka$date%>%lubridate::day() + 1




  forecast_dates <- data.frame(ds = seq.Date(from = as.Date(tail(tisefka$date,1)), length.out = asurif_zdat + 1, by = ukud_unit)[-1])
  future <-tisefka%>%dplyr::select(date)%>%dplyr::rename(ds=date)%>%
    dplyr::bind_rows(forecast_dates)
  # future <- prophet::make_future_dataframe(fcast_object, periods = asurif_zdat, freq = prphet_freq, include_history = TRUE)
  return(future)
}

#' Saldae Engine using prophet package.
#' @author Farid Azouaou
#' @param prophet_object  fitted object from Prophet
#' @param ukud_unit  time units
#' @param asurif_zdat  steps ahead (forecast horizon)
#' @return forecast object (fitted value, forecast ,actuals , upper and lower bounds)

prophet_forecast_f <- function(tisefka = NULL,prophet_object = NULL, asurif_zdat = 10, ukud_unit = NULL) {

  future <- generate_future_dates(tisefka = tisefka, ukud_unit = ukud_unit,asurif_zdat = asurif_zdat)
  # ................. forecast prediction......................
  attr(future$ds, "tzone") <- "CET"
  attr(prophet_object$history$ds, "tzone") <- "CET"
  prophet_forecast <- predict(prophet_object, future)
  # .................. generate plot of the forecast object
  prophet_output <- list()
  prophet_output$method <- "thaink2_main"
  prophet_output$mean <- tail(prophet_forecast$yhat, asurif_zdat)


  prophet_output$upper <- tail(prophet_forecast$yhat_upper, asurif_zdat)
  prophet_output$lower <- tail(prophet_forecast$yhat_lower, asurif_zdat)


  #-----------------------------------------------------
  prophet_output$x <- prophet_object$history$y

  future <- future%>%
    tail(-nrow(tisefka))
  prophet_output$forecast_dates <- as.Date(head(future$ds, asurif_zdat), tz = "CET")
  if(!(ukud_unit %in%c("months","quarters","years","days"))){
    prophet_output$forecast_dates <- as.POSIXct(head(future$ds, asurif_zdat), tz = "CET")
  }

  prophet_output$fitted <- prophet_forecast$trend
  class(prophet_output) <- "forecast"
  return(prophet_output)
}


filter_zero_values <- function(ts_actuals = NULL){
  return(!cumsum(abs(ts_actuals))  == 0)
}

#' Saldae Engine(main) using prophet package.
#' @author Farid Azouaou
#' @param tisefka  training data (data frame with time variable as rownames)
#' @param sald_holidays  information regarding holdays(based on country)
#' @param asurif_zdat  steps ahead (forecast horizon)
#' @param target_variable target variable
#' @return forecast object.
#' @export

Saldae_prophet <- function(tisefka = NULL,actuals_time_periode = NULL , ukud_unit = NULL,sald_holidays = NULL, asurif_zdat = 10, min_points = 18) {
  if(is.null(ukud_unit))ukud_unit <- SaldaeDataExplorer::detect_date_auto(time_vect = tisefka$date,n_new_dates = NULL)

  original_variable <- colnames(tisefka)[2]
  training_variable <- base::ifelse("Corrected"%in%colnames(tisefka),"Corrected",original_variable)

  tisefka_test <- NULL
  if(!is.null(actuals_time_periode)){
    actuals_time_periode_lower <- actuals_time_periode[1]
    actuals_time_periode_upper <- actuals_time_periode[2]
    tisefka <- interpolate_missing_dates(tisefka = tisefka, date_unit = ukud_unit, target_variable = training_variable)
    tisefka_test <- tisefka%>%dplyr::filter(date > !!actuals_time_periode_upper)
    tisefka <- tisefka%>%dplyr::filter(date >= !!actuals_time_periode_lower & date <= !!actuals_time_periode_upper)
    if(nrow(tisefka_test)>0){
      asurif_zdat <- max(nrow(tisefka_test),asurif_zdat)
    }
  }

  if( tisefka%>%nrow()< min_points){
    prophet_output <- tisefka%>%
      handle_short_ts(tisefka = ., ukud_unit = ukud_unit , min_points = min_points, fcast_horizon = asurif_zdat)
  }else{
    prophet_object <- prophet_fit(tisefka = tisefka,target_variable = training_variable ,sald_holidays = sald_holidays,ukud_unit = ukud_unit)
    prophet_output <- prophet_forecast_f(tisefka = tisefka, prophet_object = prophet_object, ukud_unit = ukud_unit, asurif_zdat = asurif_zdat)
  }
  prophet_output$ts_original <- tisefka[,c("date",original_variable)]
  prophet_output$ts_test <- tisefka_test
  return(prophet_output)
}



###########################################################
#' Saldae Engine(main) using prophet package.
#' @author Farid Azouaou
#' @param tisefka_list  training data (list of tibble objects (target variable corrected data date))
#' @param sald_holidays  information regarding holdays(based on country)
#' @param asurif_zdat  steps ahead (forecast horizon)
#' @param actuals_time_periode time period to use for training
#' @param target_variables target variables
#' @return forecast object.

Saldae_prophet_multiple <- function(tisefka_list= NULL,actuals_time_periode = NULL, ukud_unit = NULL,sald_holidays = NULL, asurif_zdat = 10){
  SA_fcast_object <- purrr::map(.x = tisefka_list,~ Saldae_prophet(tisefka = .x,ukud_unit = ukud_unit,actuals_time_periode = actuals_time_periode,sald_holidays = sald_holidays,asurif_zdat = asurif_zdat))
  return(SA_fcast_object)
}
Aqvayli06/SaldaeForecasting documentation built on Sept. 9, 2023, 7:21 p.m.