Dtwin: Re-usable functions for City Dtwin

Documented in LogRegresssion Predict.interval

#' @title Make time series logistic regression from dataframe
#' @description create regression for typical dtwin datasets
#' @usage LogRegresssion (df)
#' @param df dataframe which have to include the next columns: location (the name of set), indicator (the name of kpi), year, value
#' @return dataframe
#' @importFrom readxl read_excel
#' @importFrom dplyr %>%
#' @importFrom dplyr mutate
#' @importFrom dplyr select
#' @importFrom dplyr arrange
#' @importFrom stringr str_replace
#' @export
#' @examples
#' LogRegresssion (df)

LogRegresssion <- function(df){
  # df <- read_stn_csv("3. Data/","sd_sample") %>% select(location, indicator, year, value = fact) %>% drop_na(value) %>% print()

  path <- "3. Data/temp/"
  dir.create(path)
  common_Reg_name <- paste0(path, "temp (in).csv")

  Dtwin::write_stn_csv(df %>%
                         select(year, value, Indicator = indicator, location) %>%
                         arrange(location, Indicator, year) %>%
                         drop_na(value),
                       file = common_Reg_name, dec = ",")

  procFile <- "5.Java/DT.0595. Universal Regression v.2.jar"

  pr<-paste("\"",procFile,"\"", sep = "")
  dt<-paste("\"",common_Reg_name,"\"", sep = "")
  javaProc<-paste("java -jar ", pr, sep = "")
  cmd<-paste(javaProc,dt, sep = " ") # Расчетный модуль с универсальной регрессией
  system(cmd)

  ds <- Dtwin::read_stn_csv(path = path, pattern = "Free4R", type = "csv" )
  file.remove(common_Reg_name)
  file.remove(str_replace(common_Reg_name, "in", "Free4R out"))

  return(ds)

}

#' @title Make prediction intervals for the time series
#' @description prepare high & low values for time series
#' @usage Predict.interval (df, loc, ind, kpi, periods, interval.width)
#' @param df dataframe which have to include the next columns: scenario, year, value
#' @param loc name of location where we create interval
#' @param ind name of industry where we create interval
#' @param kpi name of indicator which we create interval
#' @param periods the number of months
#' @param interval.width the size from 0.7 till 0.99
#' @return dataframe
#' @importFrom readxl read_excel
#' @importFrom dplyr %>%
#' @importFrom dplyr mutate
#' @importFrom dplyr select
#' @importFrom dplyr arrange
#' @importFrom dplyr filter
#' @importFrom stringr str_sub
#' @importFrom prophet make_future_dataframe
#' @importFrom stats predict
#' @importFrom tidyr pivot_longer
#' @export
#' @examples
#' num_loc <- distinct(data_10, location) %>% arrange(location) %>% rowid_to_column() %>% mutate(total = nrow(.))
#' num_ind <- distinct(data_10, industry) %>% arrange(industry   ) %>% rowid_to_column() %>% mutate(total = nrow(.))
#' num_kpi <- distinct(data_10, indicator) %>% arrange(indicator) %>% rowid_to_column() %>% mutate(total = nrow(.))
#' data_1 <- data_10 %>%
#'  nest_by(location,indicator,industry) %>%
#'  mutate(data = list(Prediction_interval(data,
#'                                         loc = num_loc[grepl(location , num_loc$location  ),],
#'                                         ind = num_ind[grepl(industry , num_ind$industry  ),],
#'                                         kpi = num_kpi[grepl(indicator, num_kpi$indicator ),]))) %>%
#'  unnest(data) %>% ungroup() %>%
#'  mutate(year = as.integer(year),
#'         value = ifelse (value<0,0,value))
#'
# Функция посроения доверитеьных интервалов  ####
Predict.interval <- function(df, loc, ind, kpi, periods = 100, interval.width = 0.95){

  cat(paste0("\014",
             loc$location , ": ", loc$rowid  , " из ", loc$total , ", ",
             ind$industry , ": ", ind$rowid , " из ", ind$total, ", ",
             kpi$indicator, ": ", kpi$rowid, " из ", kpi$total, ", ", " \n\r"))

  df <- df %>%
    mutate(ds = ifelse(scenario == 'use',
                       paste0(year,'-01-01'),
                       ifelse(scenario == 'high',
                              paste0(year,'-05-01'),
                              paste0(year,'-09-01')))) %>%
    select (ds,y = value)

  t1 <- prophet(df, interval.width = interval.width, uncertainty.samples = 1000)
  t2 <- make_future_dataframe(t1, periods = periods)
  t3 <- predict(t1,t2)
  t4 <- t3 %>% select(ds, low = yhat_lower, high = yhat_upper) %>%
    filter(str_sub(ds, end = 7, start = 6) == '01', str_sub(ds,end = 10,start = 9) == '01') %>%
    pivot_longer(cols = low:high, names_to = 'scenario') %>%
    mutate(year = str_sub(ds,end = 4)) %>%
    mutate(year = as.integer(year)) %>%
    select(-ds)

  return(t4)
}