R/functions.R

Defines functions plot_its_poisson plot_its_lm its_zero_inflated_poisson its_poisson its_lm its_lm_fourier its_lm_wo_seas its_zero_inflated_fourier its_zero_inflated its_poisson_fourier its_poisson_wo_seas

Documented in its_lm its_lm_fourier its_lm_wo_seas its_poisson its_poisson_fourier its_poisson_wo_seas its_zero_inflated its_zero_inflated_fourier its_zero_inflated_poisson plot_its_lm plot_its_poisson

#' ITS analysis for count outcomes with no seasonality adjustment
#'
#' \code{its_poisson_wo_seas} fits a Poisson regression model to an ITS, and returns the model, the summary of the model (including the relative risk), and the original data together with the model predictions.
#'
#' @param data The data frame corresponding to the supplied formula, existing of at least 2 variables: (1) the count outcome, and (2) a vector of time points
#' @param form A formula with the response on the left, followed by the ~ operator, and the covariates on the right, separated by + operators. The formula should not contain an offest term.
#' @param offset_name either a string indicating the name of the offset column in the data, or NULL. Default value is NULL
#' @param time_name A string giving the name of the time variable. The time variable may or may not be supplied as a covariate in the formula
#' @param intervention_start_ind Numeric - a number between 1 and nrow(data)-1 stating the time point of the start of the intervention
#' @param over_dispersion Logical - indicating whether a quasi-Poisson model should be used to account for over dispersion (TRUE), or not (FLASE). Default value is FALSE.
#' @param impact_model A string specifying the assumed impact model. Possible options include "full" corresponding to a model including both a level change and a slope change, "level" corresponding to a model including just a level change, and "slope" corresponding to a model including just a slope change. Default value is "full".
#' @param counterfactual Logical - indicating whether the model-based counterfactual values should also be returned as an additional column in the data. Default value is FALSE, in which case the counterfactual values are not returned.
#' @param print_summary Logical - indicating whethwe the entire model summary should be printed, or just the relevant effect size. Default value is FALSE in which case only the effect size, together with its 95\% CI and P-value, are printed.
#' @return The function returns a list with three elements: the fitted Poisson regression model, the summary of the model (including the relative risk), and the original data together with the model predictions.
#' @examples
#' data <- unemployed
#' form <- as.formula("unemployed ~ time")
#' intervention_start_ind <- which(data$year==2020 & data$month>2| data$year==2021)[1]
#' fit <- its_poisson_wo_seas(data=data,form=form,offset_name="labour", time_name = "time",intervention_start_ind=intervention_start_ind,over_dispersion=TRUE, impact_model = "full",counterfactual = TRUE)
#' @importFrom tibble is_tibble as_tibble
#' @importFrom rlang is_formula sym
#' @importFrom stats as.formula glm lm pnorm poisson quasipoisson ts vcov update predict var na.omit quantile
#' @importFrom forecast fourier
#' @importFrom MASS mvrnorm
#' @export
its_poisson_wo_seas <- function(data,form, offset_name=NULL,time_name, intervention_start_ind,over_dispersion=FALSE, impact_model="full", counterfactual=FALSE, print_summary=FALSE){
  pred <- predC <- indicator <- NULL
  vars <- all.vars(form)
  response <- vars[1]
  covariates <- vars[-1]
  if (!(is.data.frame(data) | is_tibble(data))){
    stop("Please make sure that data is either a data frame or a tibble")
  }
  if(!(is_formula(form))){
    stop("Please make sure that form is a formula object")
  }
  if (!(time_name %in% colnames(data))){
    stop("Please make sure that time_name belongs to colnames(data)")
  }
  if(!all(data[[time_name]]==(1:nrow(data)))){
    data[[time_name]]=1:nrow(data)
    print("time_name column was overwritten with the values 1:nrow(data)")
  }
  if (!(is.null(offset_name))){
    if (!(offset_name %in% colnames(data))){
      stop("Please make sure that offset_name belongs to colnames(data)")
    }
    if (offset_name %in% covariates | paste0("offset(log(",offset_name,"))") %in% covariates | paste0("offset(",offset_name,")") %in% covariates | paste0("log(",offset_name,")") %in% covariates){
      stop("The offset term should not be included in the formula. Please supply a formula without the offset, and add the name of the offset column using the argument offset_name.")
    }
  }
  if(!is.logical(over_dispersion)){
    stop("over_dispersion must be either TRUE or FALSE")
  }
  if (!(impact_model %in% c("full","level","slope"))){
    stop("Please enter a valid impact_model name. Possible impact models are \"full\", \"level\", and \"slope\".")
  }
  if (!(response %in% colnames(data))){
    stop("Please make sure that the response variable on the left hand side of the formula object belongs to colnames(data)")
  }
  if (!all(covariates %in% colnames(data))){
    stop("Please make sure that the covariates on the right hand side of the formula object belong to colnames(data)")
  }
  n <- nrow(data)
  if (!(intervention_start_ind>1 & intervention_start_ind<=n)){
    stop("Please make sure that intervention_start_ind is a value greater than 1 and less or equal to nrow(data). intervention_start_ind is the index ")
  }
  data$indicator <- 0
  data$indicator[intervention_start_ind:n] <- 1
  data$shifted_time <- data[[time_name]]-intervention_start_ind
  # update formula
  if (impact_model=="full"){
    if (time_name %in% covariates){
      form_update <-update(form,    as.formula(paste0("~ . + indicator + indicator:shifted_time")))
    } else {
      form_update <-update(form,    as.formula(paste0("~ . +", time_name," + indicator + indicator:shifted_time")))
    }
  } else if (impact_model=="level"){
    if (time_name %in% covariates){
      form_update <-update(form,    as.formula(paste0("~ . + indicator")))
    } else {
      form_update <-update(form,    as.formula(paste0("~ . +", time_name," + indicator")))
    }
  } else if (impact_model=="slope"){
    if (time_name %in% covariates){
      form_update <-update(form,    as.formula(paste0("~ . + indicator:shifted_time")))
    } else {
      form_update <-update(form,    as.formula(paste0("~ . +", time_name,"+ indicator:shifted_time")))
    }
  }
  if (!is.null(offset_name)){
    offset_name <- sym(offset_name)
    form_update <-update(form_update, as.formula(paste0("~ . + offset(log(",offset_name,"))")))
  }
  # fit model
  if(!isTRUE(over_dispersion)){
    set.seed(1)
    model <- glm(form_update,data,family=poisson)
  } else {
    set.seed(1)
    model <- glm(form_update,data,family=quasipoisson)
  }

  t_star <- which(data$indicator==1, arr.ind = TRUE)[1] #intervention_start_ind
  b <- (n-t_star)/2

  cov_mat <- vcov(model)
  if (impact_model=="full"){
    beta_intervention <- model$coefficients["indicator"]
    beta_interaction <- model$coefficients["indicator:shifted_time"]
    RR <- exp(beta_intervention+beta_interaction*b)
    var_lin <- cov_mat["indicator","indicator"]+b^2*cov_mat["indicator:shifted_time","indicator:shifted_time"]+b*2*cov_mat["indicator","indicator:shifted_time"]
    lin_est <- beta_intervention+b*beta_interaction
  } else if (impact_model=="level"){
    beta_intervention <- model$coefficients["indicator"]
    RR <- exp(beta_intervention)
    var_lin <- cov_mat["indicator","indicator"]
    lin_est <- beta_intervention
  }
  else if (impact_model=="slope"){
    beta_interaction <- model$coefficients["indicator:shifted_time"]
    RR <- exp(beta_interaction*b)
    var_lin <- b^2*cov_mat["indicator:shifted_time","indicator:shifted_time"]#+b*2*cov_mat["indicator","indicator:shifted_time"]
    lin_est <- b*beta_interaction
  }

  CI_lin <- c(lin_est-1.96*sqrt(var_lin),lin_est+1.96*sqrt(var_lin))
  CI_RR <- exp(CI_lin)
  TS <- lin_est/sqrt(var_lin)
  p_value <- round(2*pnorm(abs(TS),lower.tail = FALSE),2)
  ret_vec <-c(RR,CI_RR,p_value)
  names(ret_vec) <- c("RR", "2.5% CI", "97.5% CI","P-value")
  s <- summary(model)
  if (isTRUE(print_summary)){
    print(s)
  }
  print(ret_vec)
  s[["RR"]] <- ret_vec #or model[["RR"]] <- ret_vec and then return(model)

  #add predictions
  predA <- predict(model,type="response")
  data$pred <- predA#*100/data[[offset_name]]
  if (isTRUE(counterfactual)){
    # Compute counterfactual values
    new_data <- data
    new_data$indicator <- 0
    pred_counter <- predict(model,type="response",newdata = new_data)
    data$predC <- pred_counter#*100/data[[offset_name]]
    data$predC[data$indicator!=1] <- NA
  }
  data <- as_tibble(data)
  # #draw
  # Group <- levels( factor(c("Counterfactual","Fitted values")))
  # response <- sym(response)
  # p2 <- ggplot(data = data , aes(y = !!response*100/data[[offset_name]], x = dt)) +
  #   geom_point()  + ylab(y_lab) + xlab("Date") +
  #   geom_line(aes(y = predC, x = dt, color="Counterfactual",linetype = "Counterfactual"), size=1,key_glyph = draw_key_smooth)+
  #   geom_line(aes(y = pred, x = dt, color="Fitted values",linetype = "Fitted values"), size=1,key_glyph = draw_key_smooth)+
  #   scale_color_manual("", values = c("Counterfactual"="red","Fitted values"="black"),breaks = Group) +
  #   scale_linetype_manual("",values = c("Counterfactual"=2,"Fitted values"=1),breaks = Group) +
  #   theme(legend.key=element_blank())
  # p2
  return(list(model=model, model_summary=s, data=data))
}

#' ITS analysis for count outcomes with seasonal adjustment via Fourier terms
#' \code{its_poisson_fourier} fits a Poisson regression model adjusted to seasonality, and returns the model, the summary of the model (including the relative risk), and the original data together with the model predictions.
#'
#' @param data The data frame corresponding to the supplied formula, existing of at least 2 variables: (1) the count outcome, and (2) a vector of time points
#' @param form A formula with the response on the left, followed by the ~ operator, and the covariates on the right, separated by + operators. The formula should not contain an offest term.
#' @param offset_name either a string indicating the name of the offset column in the data, or NULL. Default value is NULL
#' @param time_name A string giving the name of the time variable. The time variable may or may not be supplied as a covariate in the formula
#' @param intervention_start_ind Numeric - a number between 1 and nrow(data)-1 stating the time point of the start of the intervention
#' @param over_dispersion Logical - stating whether an over-dispersed Poisson model should be used (TRUE), or not (FALSE). Default is FALSE and then a regular Poisson model is used. If TRUE then a quasi-Poisson model is used instead.
#' @param freq A positive integer describing the frequency of the time series.
#' @param keep_significant_fourier Logical - indicating whether only the significant Fourier terms should be considered. Default is TRUE and then the model is fitted twice; once to obtain the significant Fourier terms, and second time keeping only the significant Fourier terms. If FALSE, then all the Fourier terms are used.
#' @param impact_model A string specifying the assumed impact model. Possible options include "full" corresponding to a model including both a level change and a slope change, "level" corresponding to a model including just a level change, and "slope" corresponding to a model including just a slope change. Default value is "full".
#' @param counterfactual Logical - indicating whether the model-based counterfactual values should also be returned as an additional column in the data. Default value is FALSE, in which case the counterfactual values are not returned.
#' @param print_summary Logical - indicating whethwe the entire model summary should be printed, or just the relevant effect size. Default value is FALSE in which case only the effect size, together with its 95\% CI and P-value, are printed.
#' @return The function returns a list with three elements: the fitted Poisson regression model, the summary of the model (including the relative risk), and the original data together with the model predictions.
#' @examples
#' data <- unemployed
#' form <- as.formula("unemployed ~ time")
#' intervention_start_ind <- which(data$year==2020 & data$month>2| data$year==2021)[1]
#' fit <- its_poisson_fourier(data=data,form=form,offset_name="labour", time_name = "time",intervention_start_ind=intervention_start_ind,over_dispersion=TRUE, freq=12, keep_significant_fourier=TRUE, impact_model = "full",counterfactual = TRUE)
#' @importFrom tibble is_tibble as_tibble
#' @importFrom rlang is_formula sym
#' @importFrom stats as.formula glm lm pnorm poisson quasipoisson ts vcov update predict var na.omit quantile
#' @importFrom forecast fourier
#' @importFrom MASS mvrnorm
#' @export
its_poisson_fourier <- function(data, form, offset_name=NULL,time_name, intervention_start_ind,over_dispersion=FALSE,freq, keep_significant_fourier=TRUE,impact_model="full", counterfactual=FALSE, print_summary=FALSE){
  pred <- predC <- indicator <- NULL
  vars <- all.vars(form)
  response <- vars[1]
  covariates <- vars[-1]
  if (!(is.data.frame(data) | is_tibble(data))){
    stop("Please make sure that data is either a data frame or a tibble")
  }
  if(!(is_formula(form))){
    stop("Please make sure that form is a formula object")
  }
  if (!(time_name %in% colnames(data))){
    stop("Please make sure that time_name belongs to colnames(data)")
  }
  if(!all(data[[time_name]]==(1:nrow(data)))){
    data[[time_name]]=1:nrow(data)
    print("time_name column was overwritten with the values 1:nrow(data)")
  }
  if (!(is.null(offset_name))){
    if (!(offset_name %in% colnames(data))){
      stop("Please make sure that offset_name belongs to colnames(data)")
    }
    if (offset_name %in% covariates | paste0("offset(log(",offset_name,"))") %in% covariates | paste0("offset(",offset_name,")") %in% covariates | paste0("log(",offset_name,")") %in% covariates){
      stop("The offset term should not be included in the formula. Please supply a formula without the offset, and add the name of the offset column using the argument offset_name.")
    }
  }
  if(!is.logical(over_dispersion)){
    stop("over_dispersion must be either TRUE or FALSE")
  }
  if (!(impact_model %in% c("full","level","slope"))){
    stop("Please enter a valid impact_model name. Possible impact models are \"full\", \"level\", and \"slope\".")
  }

  # if (offset_name %in% covariates | paste0("offset(log(",offset_name,"))") %in% covariates | paste0("offset(",offset_name,")") %in% covariates | paste0("log(",offset_name,")") %in% covariates){
  #   stop("The offset term should not be included in the formula. Please supply a formula without the offset, and add the name of the offset column using the argument offset_name.")
  # }
  if (!(response %in% colnames(data))){
    stop("Please make sure that the response variable on the left hand side of the formula object belongs to colnames(data)")
  }
  if (!all(covariates %in% colnames(data))){
    stop("Please make sure that the covariates on the right hand side of the formula object belong to colnames(data)")
  }
  n <- nrow(data)
  if (!(intervention_start_ind>1 & intervention_start_ind<=n)){
    stop("Please make sure that intervention_start_ind is a value greater than 1 and less or equal to nrow(data). intervention_start_ind is the index ")
  }
  data$indicator <- 0
  data$indicator[intervention_start_ind:n] <- 1
  data$shifted_time <- data[[time_name]]-intervention_start_ind
  # update formula
  if (impact_model=="full"){
    if (time_name %in% covariates){
      form_update <-update(form,    as.formula(paste0("~ . + indicator + indicator:shifted_time")))
    } else {
      form_update <-update(form,    as.formula(paste0("~ . +", time_name," + indicator + indicator:shifted_time")))
    }
  } else if (impact_model=="level"){
    if (time_name %in% covariates){
      form_update <-update(form,    as.formula(paste0("~ . + indicator")))
    } else {
      form_update <-update(form,    as.formula(paste0("~ . +", time_name," + indicator")))
    }
  } else if (impact_model=="slope"){
    if (time_name %in% covariates){
      form_update <-update(form,    as.formula(paste0("~ . + indicator:shifted_time")))
    } else {
      form_update <-update(form,    as.formula(paste0("~ . +", time_name,"+ indicator:shifted_time")))
    }
  }
  if (!is.null(offset_name)){
    offset_name <- sym(offset_name)
    form_update <-update(form_update, as.formula(paste0("~ . + offset(log(",offset_name,"))")))
  }
  if(!is.logical(keep_significant_fourier)){
    stop("keep_significant_fourier must be either TRUE or FALSE")
  }
  if (!(freq>1 & freq<=n)){
    stop("Please make sure that the freq is a value greater than 1 and less or equal to nrow(data)")
  }
  t_star <- which(data$indicator==1, arr.ind = TRUE)[1]

  ts_outcome <- ts(data[[response]],frequency = freq)
  fourier_mat <- fourier(ts_outcome,K=freq/2)
  colnames(fourier_mat) <- gsub("-",".", colnames(fourier_mat))
  fourier_df <- data.frame(fourier_mat)
  colnames(fourier_df) <- colnames(fourier_mat)
  data <- cbind(data,fourier_df)
  form_update_full <-update(form_update, as.formula(paste0("~ . +", paste(colnames(fourier_mat), collapse= "+"))))
  # fit model
  if(!isTRUE(over_dispersion)){
    set.seed(1)
    model <- glm(form_update_full,data,family=poisson)
  } else {
    set.seed(1)
    model <- glm(form_update_full,data,family=quasipoisson)
  }
  if(isTRUE(keep_significant_fourier)){
    all_significant_terms <- summary(model)$coeff[,4] < 0.05
    all_fourier_terms <- all_significant_terms[!names(all_significant_terms) %in% c("(Intercept)","time","indicator","indicator:shifted_time")]
    selected_fourier <- names(all_fourier_terms)[all_fourier_terms == TRUE]
    form_update_sig <-update(form_update, as.formula(paste0("~ . +", paste(selected_fourier, collapse= "+"))))
    if(!isTRUE(over_dispersion)){
      set.seed(1)
      model <- glm(form_update_sig,data,family=poisson)
    } else {
      set.seed(1)
      model <- glm(form_update_sig,data,family=quasipoisson)
    }
  }
  b <- (n-t_star)/2
  beta_intervention <- model$coefficients["indicator"]
  beta_interaction <- model$coefficients["indicator:shifted_time"]
  RR <- exp(beta_intervention+beta_interaction*b)
  cov_mat <- vcov(model)
  if (impact_model=="full"){
    beta_intervention <- model$coefficients["indicator"]
    beta_interaction <- model$coefficients["indicator:shifted_time"]
    RR <- exp(beta_intervention+beta_interaction*b)
    var_lin <- cov_mat["indicator","indicator"]+b^2*cov_mat["indicator:shifted_time","indicator:shifted_time"]+b*2*cov_mat["indicator","indicator:shifted_time"]
    lin_est <- beta_intervention+b*beta_interaction
  } else if (impact_model=="level"){
    beta_intervention <- model$coefficients["indicator"]
    RR <- exp(beta_intervention)
    var_lin <- cov_mat["indicator","indicator"]
    lin_est <- beta_intervention
  }
  else if (impact_model=="slope"){
    beta_interaction <- model$coefficients["indicator:shifted_time"]
    RR <- exp(beta_interaction*b)
    var_lin <- b^2*cov_mat["indicator:shifted_time","indicator:shifted_time"]#+b*2*cov_mat["indicator","indicator:shifted_time"]
    lin_est <- b*beta_interaction
  }
  CI_lin <- c(lin_est-1.96*sqrt(var_lin),lin_est+1.96*sqrt(var_lin))
  CI_RR <- exp(CI_lin)
  TS <- lin_est/sqrt(var_lin)
  p_value <- round(2*pnorm(abs(TS),lower.tail = FALSE),2)
  ret_vec <-c(RR,CI_RR,p_value)
  names(ret_vec) <- c("RR", "2.5% CI", "97.5% CI","P-value")
  s <- summary(model)
  if (isTRUE(print_summary)){
    print(s)
  }
  print(ret_vec)
  s[["RR"]] <- ret_vec #or model[["RR"]] <- ret_vec and then return(model)

  #add predictions
  predA <- predict(model,type="response")
  data$pred <- predA# *100/data[[offset_name]]
  if (isTRUE(counterfactual)){
    # Compute counterfactual values
    new_data <- data
    new_data$indicator <- 0
    pred_counter <- predict(model,type="response",newdata = new_data)
    data$predC <- pred_counter#*100/data[[offset_name]]
    data$predC[data$indicator!=1] <- NA
  }
  data <- as_tibble(data)

  # #draw
  # Group <- levels( factor(c("Counterfactual","Fitted values")))
  # response <- sym(response)
  # p2 <- ggplot(data = data , aes(y = !!response*100/data[[offset_name]], x = dt)) +
  #   geom_point()  + ylab(y_lab) + xlab("Date") +
  #   geom_line(aes(y = predC, x = dt, color="Counterfactual",linetype = "Counterfactual"), size=1,key_glyph = draw_key_smooth)+
  #   geom_line(aes(y = pred, x = dt, color="Fitted values",linetype = "Fitted values"), size=1,key_glyph = draw_key_smooth)+
  #   scale_color_manual("", values = c("Counterfactual"="red","Fitted values"="black"),breaks = Group) +
  #   scale_linetype_manual("",values = c("Counterfactual"=2,"Fitted values"=1),breaks = Group) +
  #   theme(legend.key=element_blank())
  # p2
  return(list(model=model, model_summary=s, data=data))
}

#' ITS analysis for zero inflated count outcomes with no seasonality adjustment
#'
#' \code{its_zero_inflated} fits a zero-inflated Poisson regression model to an ITS, and returns the model, the summary of the model (including the relative risk), and the original data together with the model predictions.
#'
#' @param data The data frame corresponding to the supplied formula, existing of at least 2 variables: (1) the count outcome, and (2) a vector of time points
#' @param form A formula with the response on the left, followed by the ~ operator, and the covariates on the right, separated by + operators. The formula should not contain an offest term.
#' @param offset_name either a string indicating the name of the offset column in the data, or NULL. Default value is NULL
#' @param time_name A string giving the name of the time variable. The time variable may or may not be supplied as a covariate in the formula
#' @param intervention_start_ind Numeric - a number between 1 and nrow(data)-1 stating the time point of the start of the intervention
#' @param impact_model A string specifying the assumed impact model. Possible options include "full" corresponding to a model including both a level change and a slope change, "level" corresponding to a model including just a level change, and "slope" corresponding to a model including just a slope change. Default value is "full".
#' @param counterfactual Logical - indicating whether the model-based counterfactual values should also be returned as an additional column in the data. Default value is FALSE, in which case the counterfactual values are not returned.
#' @param print_summary Logical - indicating whethwe the entire model summary should be printed, or just the relevant effect size. Default value is FALSE in which case only the effect size, together with its 95\% CI and P-value, are printed.
#' @return The function returns a list with three elements: the fitted Poisson regression model, the summary of the model (including the relative risk), and the original data together with the model predictions.
#' @examples
#' data <- zero_inflated_sim_data
#' form <- as.formula("monthly_total ~ time")
#' intervention_start_ind <- which(data$Year==2020 & data$Month==3)
#' fit <- its_zero_inflated(data=data,form=form, time_name = "time",intervention_start_ind=intervention_start_ind, impact_model = "full",counterfactual = TRUE)
#' @importFrom tibble is_tibble as_tibble
#' @importFrom rlang is_formula sym
#' @importFrom stats as.formula glm lm pnorm poisson quasipoisson ts vcov update predict var na.omit quantile
#' @importFrom pscl zeroinfl
#' @importFrom MASS mvrnorm
#' @export
its_zero_inflated <- function(data,form, offset_name=NULL,time_name, intervention_start_ind, impact_model="full", counterfactual=FALSE, print_summary=FALSE){
  pred <- predC <- indicator <- NULL
  vars <- all.vars(form)
  response <- vars[1]
  covariates <- vars[-1]
  if (!(is.data.frame(data) | is_tibble(data))){
    stop("Please make sure that data is either a data frame or a tibble")
  }
  if(!(is_formula(form))){
    stop("Please make sure that form is a formula object")
  }
  if (!(time_name %in% colnames(data))){
    stop("Please make sure that time_name belongs to colnames(data)")
  }
  if(!all(data[[time_name]]==(1:nrow(data)))){
    data[[time_name]]=1:nrow(data)
    print("time_name column was overwritten with the values 1:nrow(data)")
  }
  if (!(is.null(offset_name))){
    if (!(offset_name %in% colnames(data))){
      stop("Please make sure that offset_name belongs to colnames(data)")
    }
    if (offset_name %in% covariates | paste0("offset(log(",offset_name,"))") %in% covariates | paste0("offset(",offset_name,")") %in% covariates | paste0("log(",offset_name,")") %in% covariates){
      stop("The offset term should not be included in the formula. Please supply a formula without the offset, and add the name of the offset column using the argument offset_name.")
    }
  }
  if (!(impact_model %in% c("full","level","slope"))){
    stop("Please enter a valid impact_model name. Possible impact models are \"full\", \"level\", and \"slope\".")
  }
  if (!(response %in% colnames(data))){
    stop("Please make sure that the response variable on the left hand side of the formula object belongs to colnames(data)")
  }
  if (!all(covariates %in% colnames(data))){
    stop("Please make sure that the covariates on the right hand side of the formula object belong to colnames(data)")
  }
  n <- nrow(data)
  if (!(intervention_start_ind>1 & intervention_start_ind<=n)){
    stop("Please make sure that intervention_start_ind is a value greater than 1 and less or equal to nrow(data). intervention_start_ind is the index ")
  }
  data$indicator <- 0
  data$indicator[intervention_start_ind:n] <- 1
  data$shifted_time <- data[[time_name]]-intervention_start_ind
  # update formula
  if (impact_model=="full"){
    if (time_name %in% covariates){
      form_update <-update(form,    as.formula(paste0("~ . + indicator + indicator:shifted_time | 1")))
    } else {
      form_update <-update(form,    as.formula(paste0("~ . +", time_name," + indicator + indicator:shifted_time | 1")))
    }
  } else if (impact_model=="level"){
    if (time_name %in% covariates){
      form_update <-update(form,    as.formula(paste0("~ . + indicator | 1")))
    } else {
      form_update <-update(form,    as.formula(paste0("~ . +", time_name," + indicator | 1")))
    }
  } else if (impact_model=="slope"){
    if (time_name %in% covariates){
      form_update <-update(form,    as.formula(paste0("~ . + indicator:shifted_time| 1")))
    } else {
      form_update <-update(form,    as.formula(paste0("~ . +", time_name,"+ indicator:shifted_time | 1")))
    }
  }
  if (!is.null(offset_name)){
    offset_name <- sym(offset_name)
    form_update <-update(form_update, as.formula(paste0("~ . + offset(log(",offset_name,"))")))
  }
  x <- gsub("[()]", "", form_update)
  form_update <- as.formula(paste(x[2],x[1],x[3]))
  # fit model
  set.seed(1)
  model <- zeroinfl(form_update,data,dist="poisson")
  coeff <- model$coefficients["count"]
  t_star <- which(data$indicator==1, arr.ind = TRUE)[1] #intervention_start_ind
  b <- (n-t_star)/2

  cov_mat <- vcov(model)
  if (impact_model=="full"){
    beta_intervention <- coeff$count["indicator"]
    beta_interaction <- coeff$count["indicator:shifted_time"]
    RR <- exp(beta_intervention+beta_interaction*b)
    var_lin <- cov_mat["count_indicator","count_indicator"]+b^2*cov_mat["count_indicator:shifted_time","count_indicator:shifted_time"]+b*2*cov_mat["count_indicator","count_indicator:shifted_time"]
    lin_est <- beta_intervention+b*beta_interaction
  } else if (impact_model=="level"){
    beta_intervention <- coeff$count["count_indicator"]
    RR <- exp(beta_intervention)
    var_lin <- cov_mat["count_indicator","count_indicator"]
    lin_est <- beta_intervention
  }
  else if (impact_model=="slope"){
    beta_interaction <- coeff$count["count_indicator:shifted_time"]
    RR <- exp(beta_interaction*b)
    var_lin <- b^2*cov_mat["count_indicator:shifted_time","count_indicator:shifted_time"]#+b*2*cov_mat["indicator","indicator:shifted_time"]
    lin_est <- b*beta_interaction
  }

  CI_lin <- c(lin_est-1.96*sqrt(var_lin),lin_est+1.96*sqrt(var_lin))
  CI_RR <- exp(CI_lin)
  TS <- lin_est/sqrt(var_lin)
  p_value <- round(2*pnorm(abs(TS),lower.tail = FALSE),2)
  ret_vec <-c(RR,CI_RR,p_value)
  names(ret_vec) <- c("RR", "2.5% CI", "97.5% CI","P-value")
  s <- summary(model)
  if (isTRUE(print_summary)){
    print(s)
  }
  print(ret_vec)
  s[["RR"]] <- ret_vec #or model[["RR"]] <- ret_vec and then return(model)

  #add predictions
  predA <- predict(model,type="response")
  data$pred <- predA#*100/data[[offset_name]]
  if (isTRUE(counterfactual)){
    # Compute counterfactual values
    new_data <- data
    new_data$indicator <- 0
    pred_counter <- predict(model,type="response",newdata = new_data)
    data$predC <- pred_counter#*100/data[[offset_name]]
    data$predC[data$indicator!=1] <- NA
  }
  data <- as_tibble(data)

  return(list(model=model, model_summary=s, data=data))
}


#' ITS analysis for zero inflated count outcomes with seasonal adjustment
#'
#' \code{its_zero_inflated_fourier} fits a zero-inflated Poisson regression model to an ITS, and returns the model, the summary of the model (including the relative risk), and the original data together with the model predictions.
#'
#' @param data The data frame corresponding to the supplied formula, existing of at least 2 variables: (1) the count outcome, and (2) a vector of time points
#' @param form A formula with the response on the left, followed by the ~ operator, and the covariates on the right, separated by + operators. The formula should not contain an offest term.
#' @param offset_name either a string indicating the name of the offset column in the data, or NULL. Default value is NULL
#' @param time_name A string giving the name of the time variable. The time variable may or may not be supplied as a covariate in the formula
#' @param intervention_start_ind Numeric - a number between 1 and nrow(data)-1 stating the time point of the start of the intervention
#' @param freq A positive integer describing the frequency of the time series.
#' @param impact_model A string specifying the assumed impact model. Possible options include "full" corresponding to a model including both a level change and a slope change, "level" corresponding to a model including just a level change, and "slope" corresponding to a model including just a slope change. Default value is "full".
#' @param counterfactual Logical - indicating whether the model-based counterfactual values should also be returned as an additional column in the data. Default value is FALSE, in which case the counterfactual values are not returned.
#' @param print_summary Logical - indicating whethwe the entire model summary should be printed, or just the relevant effect size. Default value is FALSE in which case only the effect size, together with its 95\% CI and P-value, are printed.
#' @return The function returns a list with three elements: the fitted Poisson regression model, the summary of the model (including the relative risk), and the original data together with the model predictions.
#' @examples
#' data <- zero_inflated_sim_data
#' form <- as.formula("monthly_total ~ time")
#' intervention_start_ind <- which(data$Year==2020 & data$Month==3)
#' fit <- its_zero_inflated_fourier(data=data,form=form, time_name = "time",intervention_start_ind=intervention_start_ind, freq=12, impact_model = "full",counterfactual = TRUE)
#' @importFrom tibble is_tibble as_tibble
#' @importFrom rlang is_formula sym
#' @importFrom stats as.formula glm lm pnorm poisson quasipoisson ts vcov update predict var na.omit quantile
#' @importFrom pscl zeroinfl
#' @importFrom MASS mvrnorm
#' @importFrom forecast fourier
#' @export
its_zero_inflated_fourier <- function(data,form, offset_name=NULL,time_name, intervention_start_ind, freq, impact_model="full", counterfactual=FALSE, print_summary=FALSE){
  pred <- predC <- indicator <- NULL
  vars <- all.vars(form)
  response <- vars[1]
  covariates <- vars[-1]
  if (!(is.data.frame(data) | is_tibble(data))){
    stop("Please make sure that data is either a data frame or a tibble")
  }
  if(!(is_formula(form))){
    stop("Please make sure that form is a formula object")
  }
  if (!(time_name %in% colnames(data))){
    stop("Please make sure that time_name belongs to colnames(data)")
  }
  if(!all(data[[time_name]]==(1:nrow(data)))){
    data[[time_name]]=1:nrow(data)
    print("time_name column was overwritten with the values 1:nrow(data)")
  }
  if (!(is.null(offset_name))){
    if (!(offset_name %in% colnames(data))){
      stop("Please make sure that offset_name belongs to colnames(data)")
    }
    if (offset_name %in% covariates | paste0("offset(log(",offset_name,"))") %in% covariates | paste0("offset(",offset_name,")") %in% covariates | paste0("log(",offset_name,")") %in% covariates){
      stop("The offset term should not be included in the formula. Please supply a formula without the offset, and add the name of the offset column using the argument offset_name.")
    }
  }
  if (!(impact_model %in% c("full","level","slope"))){
    stop("Please enter a valid impact_model name. Possible impact models are \"full\", \"level\", and \"slope\".")
  }
  if (!(response %in% colnames(data))){
    stop("Please make sure that the response variable on the left hand side of the formula object belongs to colnames(data)")
  }
  if (!all(covariates %in% colnames(data))){
    stop("Please make sure that the covariates on the right hand side of the formula object belong to colnames(data)")
  }
  n <- nrow(data)
  if (!(intervention_start_ind>1 & intervention_start_ind<=n)){
    stop("Please make sure that intervention_start_ind is a value greater than 1 and less or equal to nrow(data). intervention_start_ind is the index ")
  }
  data$indicator <- 0
  data$indicator[intervention_start_ind:n] <- 1
  data$shifted_time <- data[[time_name]]-intervention_start_ind

  # update formula
  if (!(freq>1 & freq<=n)){
    stop("Please make sure that the freq is a value greater than 1 and less or equal to nrow(data)")
  }
  ts_outcome <- ts(data[[response]],frequency = freq)
  fourier_mat <- fourier(ts_outcome,K=freq/2)
  colnames(fourier_mat) <- gsub("-",".", colnames(fourier_mat))
  fourier_df <- data.frame(fourier_mat)
  colnames(fourier_df) <- colnames(fourier_mat)
  data <- cbind(data,fourier_df)
  form <-update(form, as.formula(paste0("~ . +", paste(colnames(fourier_mat), collapse= "+"))))
  if (impact_model=="full"){
    if (time_name %in% covariates){
      form_update <-update(form,    as.formula(paste0("~ . + indicator + indicator:shifted_time | 1")))
    } else {
      form_update <-update(form,    as.formula(paste0("~ . +", time_name," + indicator + indicator:shifted_time | 1")))
    }
  } else if (impact_model=="level"){
    if (time_name %in% covariates){
      form_update <-update(form,    as.formula(paste0("~ . + indicator | 1")))
    } else {
      form_update <-update(form,    as.formula(paste0("~ . +", time_name," + indicator | 1")))
    }
  } else if (impact_model=="slope"){
    if (time_name %in% covariates){
      form_update <-update(form,    as.formula(paste0("~ . + indicator:shifted_time| 1")))
    } else {
      form_update <-update(form,    as.formula(paste0("~ . +", time_name,"+ indicator:shifted_time | 1")))
    }
  }
  if (!is.null(offset_name)){
    offset_name <- sym(offset_name)
    form_update <-update(form_update, as.formula(paste0("~ . + offset(log(",offset_name,"))")))
  }

  t_star <- which(data$indicator==1, arr.ind = TRUE)[1]

  x <- gsub("[()]", "", form_update)
  form_update <- as.formula(paste(x[2],x[1],x[3]))
  # fit model
  set.seed(1)
  model <- zeroinfl(form_update,data,dist="poisson")
  coeff <- model$coefficients["count"]
  t_star <- which(data$indicator==1, arr.ind = TRUE)[1] #intervention_start_ind
  b <- (n-t_star)/2

  cov_mat <- vcov(model)
  if (impact_model=="full"){
    beta_intervention <- coeff$count["indicator"]
    beta_interaction <- coeff$count["indicator:shifted_time"]
    RR <- exp(beta_intervention+beta_interaction*b)
    var_lin <- cov_mat["count_indicator","count_indicator"]+b^2*cov_mat["count_indicator:shifted_time","count_indicator:shifted_time"]+b*2*cov_mat["count_indicator","count_indicator:shifted_time"]
    lin_est <- beta_intervention+b*beta_interaction
  } else if (impact_model=="level"){
    beta_intervention <- coeff$count["count_indicator"]
    RR <- exp(beta_intervention)
    var_lin <- cov_mat["count_indicator","count_indicator"]
    lin_est <- beta_intervention
  }
  else if (impact_model=="slope"){
    beta_interaction <- coeff$count["count_indicator:shifted_time"]
    RR <- exp(beta_interaction*b)
    var_lin <- b^2*cov_mat["count_indicator:shifted_time","count_indicator:shifted_time"]#+b*2*cov_mat["indicator","indicator:shifted_time"]
    lin_est <- b*beta_interaction
  }

  CI_lin <- c(lin_est-1.96*sqrt(var_lin),lin_est+1.96*sqrt(var_lin))
  CI_RR <- exp(CI_lin)
  TS <- lin_est/sqrt(var_lin)
  p_value <- round(2*pnorm(abs(TS),lower.tail = FALSE),2)
  ret_vec <-c(RR,CI_RR,p_value)
  names(ret_vec) <- c("RR", "2.5% CI", "97.5% CI","P-value")
  s <- summary(model)
  if (isTRUE(print_summary)){
    print(s)
  }
  print(ret_vec)
  s[["RR"]] <- ret_vec #or model[["RR"]] <- ret_vec and then return(model)

  #add predictions
  predA <- predict(model,type="response")
  data$pred <- predA#*100/data[[offset_name]]
  if (isTRUE(counterfactual)){
    # Compute counterfactual values
    new_data <- data
    new_data$indicator <- 0
    pred_counter <- predict(model,type="response",newdata = new_data)
    data$predC <- pred_counter#*100/data[[offset_name]]
    data$predC[data$indicator!=1] <- NA
  }
  data <- as_tibble(data)

  return(list(model=model, model_summary=s, data=data))
}

#' ITS analysis for continuous outcomes with no seasonality
#' \code{its_lm_wo_seas} fits a linear regression model to an ITS, and returns the model, the summary of the model (including the mean difference and Cohen's d), and the original data together with the model predictions.
#'
#' @param data The data frame corresponding to the supplied formula, existing of at least 2 variables: (1) the count outcome, and (2) a vector of time points
#' @param form A formula with the response on the left, followed by the ~ operator, and the covariates on the right, separated by + operators. The formula should not contain an offest term.
#' @param time_name A string giving the name of the time variable. The time variable may or may not be supplied as a covariate in the formula
#' @param intervention_start_ind Numeric - a number between 1 and nrow(data)-1 stating the time point of the start of the intervention
#' @param impact_model A string specifying the assumed impact model. Possible options include "full" corresponding to a model including both a level change and a slope change, "level" corresponding to a model including just a level change, and "slope" corresponding to a model including just a slope change. Default value is "full".
#' @param counterfactual Logical - indicating whether the model-based counterfactual values should also be returned as an additional column in the data. Default value is FALSE, in which case the counterfactual values are not returned.
#' @param print_summary Logical - indicating whethwe the entire model summary should be printed, or just the relevant effect size. Default value is FALSE in which case only the effect size, together with its 95\% CI and P-value, are printed.
#' @return The function returns a list with three elements: the fitted linear regression model, the summary of the model (including the mean difference and Cohen's d), and the original data together with the model predictions.
#' @examples
#' data <- unemployed
#' form <- as.formula("percent ~ time")
#' intervention_start_ind <- which(data$year==2020 & data$month>2| data$year==2021)[1]
#' fit <- its_lm_wo_seas(data=data,form=form,time_name = "time",intervention_start_ind=intervention_start_ind, impact_model = "full",counterfactual = TRUE)
#' @importFrom tibble is_tibble as_tibble
#' @importFrom rlang is_formula sym
#' @importFrom stats as.formula glm lm pnorm ts vcov update predict var na.omit quantile
#' @importFrom forecast fourier
#' @importFrom MASS mvrnorm
#' @export
its_lm_wo_seas <- function(data,form,time_name, intervention_start_ind,impact_model="full", counterfactual=FALSE, print_summary=FALSE){
  pred <- predC <- indicator <- NULL
  if (!(is.data.frame(data) | is_tibble(data))){
    stop("Please make sure that data is either a data frame or a tibble")
  }
  if(!(is_formula(form))){
    stop("Please make sure that form is a formula object")
  }
  if (!(time_name %in% colnames(data))){
    stop("Please make sure that time_name belongs to colnames(data)")
  }
  if(!all(data[[time_name]]==(1:nrow(data)))){
    data[[time_name]]=1:nrow(data)
    print("time_name column was overwritten with the values 1:nrow(data)")
  }
  if (!(impact_model %in% c("full","level","slope"))){
    stop("Please enter a valid impact_model name. Possible impact models are \"full\", \"level\", and \"slope\".")
  }
  vars <- all.vars(form)
  response <- vars[1]
  covariates <- vars[-1]
  if (!(response %in% colnames(data))){
    stop("Please make sure that the response variable on the left hand side of the formula object belongs to colnames(data)")
  }
  if (!all(covariates %in% colnames(data))){
    stop("Please make sure that the covariates on the right hand side of the formula object belong to colnames(data)")
  }
  n <- nrow(data)
  if (!(intervention_start_ind>1 & intervention_start_ind<=n)){
    stop("Please make sure that intervention_start_ind is a value greater than 1 and less or equal to nrow(data). intervention_start_ind is the index ")
  }
  data$indicator <- 0
  data$indicator[intervention_start_ind:n] <- 1
  data$shifted_time <- data[[time_name]]-intervention_start_ind
  # update formula
  if (impact_model=="full"){
    if (time_name %in% covariates){
      form_update <-update(form,    as.formula(paste0("~ . + indicator + indicator:shifted_time")))
    } else {
      form_update <-update(form,    as.formula(paste0("~ . +", time_name," + indicator + indicator:shifted_time")))
    }
  } else if (impact_model=="level"){
    if (time_name %in% covariates){
      form_update <-update(form,    as.formula(paste0("~ . + indicator")))
    } else {
      form_update <-update(form,    as.formula(paste0("~ . +", time_name," + indicator")))
    }
  } else if (impact_model=="slope"){
    if (time_name %in% covariates){
      form_update <-update(form,    as.formula(paste0("~ . + indicator:shifted_time")))
    } else {
      form_update <-update(form,    as.formula(paste0("~ . +", time_name,"+ indicator:shifted_time")))
    }
  }

  # fit model
  model <- lm(form_update,data)
  cov_mat <- vcov(model)

  #Mean difference
  t_star <- which(data$indicator==1, arr.ind = TRUE)[1]
  b <- (n-t_star)/2
  if (impact_model=="full"){
    beta_intervention <- model$coefficients["indicator"]
    beta_interaction <- model$coefficients["indicator:shifted_time"]
    mean_diff <- beta_intervention+beta_interaction*b
    var_lin <- cov_mat["indicator","indicator"]+b^2*cov_mat["indicator:shifted_time","indicator:shifted_time"]+b*2*cov_mat["indicator","indicator:shifted_time"]
  } else if (impact_model=="level"){
    beta_intervention <- model$coefficients["indicator"]
    mean_diff <- beta_intervention
    var_lin <- cov_mat["indicator","indicator"]
  } else if (impact_model=="slope"){
    beta_interaction <- model$coefficients["indicator:shifted_time"]
    mean_diff <- beta_interaction*b
    var_lin <- b^2*cov_mat["indicator:shifted_time","indicator:shifted_time"]#+b*2*cov_mat["indicator","indicator:shifted_time"]
  }

  CI <- c(mean_diff-1.96*sqrt(var_lin),mean_diff+1.96*sqrt(var_lin))
  TS <- mean_diff/sqrt(var_lin)
  p_value <- round(2*pnorm(abs(TS),lower.tail = FALSE),2)
  ret_vec <-c(mean_diff, CI ,p_value)
  names(ret_vec) <- c("Mean difference", "2.5% CI", "97.5% CI","P-value")
  summar <- summary(model)
  # print(summar)
  # print(ret_vec)
  summar[["Mean difference"]] <- ret_vec #or model[["RR"]] <- ret_vec and then return(model)

  #add predictions and compute effect size
  predA <- predict(model,type="response")
  data$pred <- predA
  intervention_predictions <- data$pred[data$indicator==1]
  s1<- var(intervention_predictions)
  # Compute counterfactual values
  new_data <- data
  new_data$indicator <- 0
  pred_counter <- predict(model,type="response",newdata = new_data)
  data$predC <- pred_counter
  data$predC[data$indicator!=1] <- NA
  data <- as_tibble(data)
  counterfacual_predictions <- na.omit(data$predC)
  s2 <- var(counterfacual_predictions)
  s <- sqrt((s1+s2)/2) #pooled
  # s <- sqrt(var(data[[response]][data$indicator==1])) #SD of observed values for the intervention period
  # s <- sqrt(var(intervention_predictions-counterfacual_predictions)) #SD of difference
  # s <- sqrt(s2) #SD of control
  cohen_d <- mean_diff/s

  #bootstrap for CI and p-value
  #CI
  betas <- model$coefficients
  n_boot_samples <- 2000
  cohen_d_mat <- matrix(NA,n_boot_samples,2)
  model_copy <- model
  model_copy_null <- model
  for (i in 1:n_boot_samples){
    set.seed(i)
    sampled_beta <- mvrnorm(n=1, mu=betas, Sigma=cov_mat )
    sampled_beta_null <- sampled_beta
    if (impact_model=="full"){
      sampled_beta_null[c("indicator","indicator:shifted_time")] <- sampled_beta[c("indicator","indicator:shifted_time")]-betas[c("indicator","indicator:shifted_time")]
    } else if (impact_model=="level"){
      sampled_beta_null["indicator"] <- sampled_beta["indicator"]-betas["indicator"]
    } else if (impact_model=="slope"){
      sampled_beta_null["indicator:shifted_time"] <- sampled_beta["indicator:shifted_time"]-betas["indicator:shifted_time"]
    }
    model_copy$coefficients <- sampled_beta
    fitted_values <- predict(model_copy,type="response")[intervention_start_ind:n]
    counterfactuals <-  predict(model_copy,type="response",newdata = new_data)[intervention_start_ind:n]
    var_fitted <- var(fitted_values)
    var_counter <- var(counterfactuals)
    Sp <- sqrt((var_fitted+var_counter)/2)
    if (impact_model=="full"){
      MD <- sampled_beta["indicator"]+b*sampled_beta["indicator:shifted_time"]
    } else if (impact_model=="level"){
      MD <- sampled_beta["indicator"]
    } else if (impact_model=="slope"){
      MD <- b*sampled_beta["indicator:shifted_time"]
    }
    d <- MD/Sp
    cohen_d_mat[i,1] <- d
    model_copy$coefficients <- sampled_beta_null
    fitted_values <- predict(model_copy,type="response")[intervention_start_ind:n]
    counterfactuals <-  predict(model_copy,type="response",newdata = new_data)[intervention_start_ind:n]
    var_fitted <- var(fitted_values)
    var_counter <- var(counterfactuals)
    Sp <- sqrt((var_fitted+var_counter)/2)
    if (impact_model=="full"){
      MD <- sampled_beta_null["indicator"]+b*sampled_beta_null["indicator:shifted_time"]
    } else if (impact_model=="level"){
      MD <- sampled_beta_null["indicator"]
    } else if (impact_model=="slope"){
      MD <- b*sampled_beta_null["indicator:shifted_time"]
    }
    d <- MD/Sp
    cohen_d_mat[i,2] <- d
  }
  CI_low <- quantile(cohen_d_mat[,1],0.025)
  CI_up <- quantile(cohen_d_mat[,1],0.975)
  p_value_d <- mean(abs(cohen_d_mat[,2])>abs(cohen_d))

  # #P-value
  # null_betas <- betas
  # if (impact_model=="full"){
  #   null_betas[c("indicator","indicator:shifted_time")] <- c(0,0)
  # } else if (impact_model=="level"){
  #   null_betas["indicator"] <- 0
  # } else if (impact_model=="slope"){
  #   null_betas["indicator:shifted_time"] <- 0
  # }
  # cohen_d_vector <- matrix(NA,n_boot_samples,1)
  # for (i in 1:n_boot_samples){
  #   sampled_null_beta <- mvrnorm(n=1, mu=null_betas, Sigma=cov_mat )
  #   model_copy$coefficients <- sampled_null_beta
  #   fitted_values <- predict(model_copy,type="response")[intervention_start_ind:n]
  #   counterfactuals <-  predict(model_copy,type="response",newdata = new_data)[intervention_start_ind:n]
  #   var_fitted <- var(fitted_values)
  #   var_counter <- var(counterfactuals)
  #   Sp <- sqrt((var_fitted+var_counter)/2)
  #   if (impact_model=="full"){
  #     MD <- sampled_null_beta["indicator"]+b*sampled_null_beta["indicator:shifted_time"]
  #   } else if (impact_model=="level"){
  #     MD <- sampled_null_beta["indicator"]
  #   }
  #   else if (impact_model=="slope"){
  #     MD <- b*sampled_null_beta["indicator:shifted_time"]
  #   }
  #   d <- MD/Sp
  #   cohen_d_vector[i] <- d
  # }
  # p_value_d <- mean(abs(cohen_d_vector)>abs(cohen_d))
  ret_vec2 <-c(cohen_d, CI_low, CI_up ,p_value_d)
  names(ret_vec2) <- c("Cohen's d", "2.5% CI", "97.5% CI","P-value")
  if (isTRUE(print_summary)){
    print(summar)
    print(ret_vec)
  }
  print(ret_vec2)
  summar[["Cohen's d"]] <- ret_vec2

  if (!(isTRUE(counterfactual))){
    data$predC <- NULL
  }
  # N <- 2*length(intervention_predictions)-2
  # J <- gamma(N/2) / (sqrt(N/2) * gamma((N-1)/2))
  # bias_corrected_d <- J*cohen_d
  # sig_d <- sqrt((8+bias_corrected_d^2)/(4*length(intervention_predictions)))
  # lower <- bias_corrected_d-1.96*sig_d
  # upper <- bias_corrected_d+1.96*sig_d
  # TS_d <- bias_corrected_d/sig_d
  # p_value_d <- round(2*pnorm(abs(TS_d),lower.tail = FALSE),2)



  # #draw
  # Group <- levels( factor(c("Counterfactual","Fitted values")))
  # response <- sym(response)
  # p2 <- ggplot(data = data , aes(y = !!response, x = dt)) +
  #   geom_point()  + ylab(y_lab) + xlab("Date") +
  #   geom_line(aes(y = predC, x = dt, color="Counterfactual",linetype = "Counterfactual"), size=1,key_glyph = draw_key_smooth)+
  #   geom_line(aes(y = pred, x = dt, color="Fitted values",linetype = "Fitted values"), size=1,key_glyph = draw_key_smooth)+
  #   scale_color_manual("", values = c("Counterfactual"="red","Fitted values"="black"),breaks = Group) +
  #   scale_linetype_manual("",values = c("Counterfactual"=2,"Fitted values"=1),breaks = Group) +
  #   theme(legend.key=element_blank())
  # p2
  return(list(model=model, model_summary=summar, data=data))
}

#' ITS analysis for continuous outcomes with seasonal adjustments via Fourier terms.
#'
#' \code{its_lm_fourier} fits a linear regression model asjusted to seasonality, and returns the model, the summary of the model (including the mean difference and Cohen's d), and the original data together with the model predictions.
#'
#' @param data The data frame corresponding to the supplied formula, existing of at least 2 variables: (1) the count outcome, and (2) a vector of time points
#' @param form A formula with the response on the left, followed by the ~ operator, and the covariates on the right, separated by + operators. The formula should not contain an offest term.
#' @param time_name A string giving the name of the time variable. The time variable may or may not be supplied as a covariate in the formula
#' @param intervention_start_ind Numeric - a number between 1 and nrow(data)-1 stating the time point of the start of the intervention
#' @param freq A positive integer describing the frequency of the time series.
#' @param keep_significant_fourier Logical - indicating whether only the significant Fourier terms should be considered. Default is TRUE and then the model is fitted twice; once to obtain the significant Fourier terms, and second time keeping only the significant Fourier terms. If FALSE, then all the Fourier terms are used.
#' @param impact_model A string specifying the assumed impact model. Possible options include "full" corresponding to a model including both a level change and a slope change, "level" corresponding to a model including just a level change, and "slope" corresponding to a model including just a slope change. Default value is "full".
#' @param counterfactual Logical - indicating whether the model-based counterfactual values should also be returned as an additional column in the data. Default value is FALSE, in which case the counterfactual values are not returned.
#' @param print_summary Logical - indicating whethwe the entire model summary should be printed, or just the relevant effect size. Default value is FALSE in which case only the effect size, together with its 95\% CI and P-value, are printed.
#' @return The function returns a list with three elements: the fitted linear regression model, the summary of the model (including the mean difference and Cohen's d), and the original data together with the model predictions.
#' @examples
#' data <- unemployed
#' form <- as.formula("percent ~ time")
#' intervention_start_ind <- which(data$year==2020 & data$month>2| data$year==2021)[1]
#' fit <- its_lm_fourier(data=data,form=form,time_name = "time",intervention_start_ind=intervention_start_ind,freq=12, keep_significant_fourier=TRUE, impact_model = "full",counterfactual = TRUE)
#' @importFrom tibble is_tibble as_tibble
#' @importFrom rlang is_formula sym
#' @importFrom stats as.formula glm lm pnorm ts vcov update predict var na.omit quantile
#' @importFrom forecast fourier
#' @importFrom MASS mvrnorm
#' @export
its_lm_fourier <- function(data, form, time_name, intervention_start_ind,freq, keep_significant_fourier=TRUE,impact_model="full", counterfactual=FALSE, print_summary=FALSE){
  pred <- predC <- indicator <- NULL
  if (!(is.data.frame(data) | is_tibble(data))){
    stop("Please make sure that data is either a data frame or a tibble")
  }
  if(!(is_formula(form))){
    stop("Please make sure that form is a formula object")
  }
  if (!(time_name %in% colnames(data))){
    stop("Please make sure that time_name belongs to colnames(data)")
  }
  if(!all(data[[time_name]]==(1:nrow(data)))){
    data[[time_name]]=1:nrow(data)
    print("time_name column was overwritten with the values 1:nrow(data)")
  }
  if (!(impact_model %in% c("full","level","slope"))){
    stop("Please enter a valid impact_model name. Possible impact models are \"full\", \"level\", and \"slope\".")
  }
  vars <- all.vars(form)
  response <- vars[1]
  covariates <- vars[-1]
  if (!(response %in% colnames(data))){
    stop("Please make sure that the response variable on the left hand side of the formula object belongs to colnames(data)")
  }
  if (!all(covariates %in% colnames(data))){
    stop("Please make sure that the covariates on the right hand side of the formula object belong to colnames(data)")
  }
  n <- nrow(data)
  if (!(intervention_start_ind>1 & intervention_start_ind<=n)){
    stop("Please make sure that intervention_start_ind is a value greater than 1 and less or equal to nrow(data). intervention_start_ind is the index ")
  }
  data$indicator <- 0
  data$indicator[intervention_start_ind:n] <- 1
  data$shifted_time <- data[[time_name]]-intervention_start_ind
  # update formula
  if (impact_model=="full"){
    if (time_name %in% covariates){
      form_update <-update(form,    as.formula(paste0("~ . + indicator + indicator:shifted_time")))
    } else {
      form_update <-update(form,    as.formula(paste0("~ . +", time_name," + indicator + indicator:shifted_time")))
    }
  } else if (impact_model=="level"){
    if (time_name %in% covariates){
      form_update <-update(form,    as.formula(paste0("~ . + indicator")))
    } else {
      form_update <-update(form,    as.formula(paste0("~ . +", time_name," + indicator")))
    }
  } else if (impact_model=="slope"){
    if (time_name %in% covariates){
      form_update <-update(form,    as.formula(paste0("~ . + indicator:shifted_time")))
    } else {
      form_update <-update(form,    as.formula(paste0("~ . +", time_name,"+ indicator:shifted_time")))
    }
  }
  if(!is.logical(keep_significant_fourier)){
    stop("keep_significant_fourier must be either TRUE or FALSE")
  }
  if (!(freq>1 & freq<=n)){
    stop("Please make sure that the freq is a value greater than 1 and less or equal to nrow(data)")
  }
  t_star <- which(data$indicator==1, arr.ind = TRUE)[1]
  #estimate Fourier terms based only on pre-intervention data
  ts_outcome <- ts(data[[response]],frequency = freq)
  fourier_mat <- fourier(ts_outcome,K=freq/2)
  colnames(fourier_mat) <- gsub("-",".", colnames(fourier_mat))
  fourier_df <- data.frame(fourier_mat)
  colnames(fourier_df) <- colnames(fourier_mat)
  data <- cbind(data,fourier_df)
  form_update_full <-update(form_update, as.formula(paste0("~ . +", paste(colnames(fourier_mat), collapse= "+"))))
  # fit model
  model <- lm(form_update_full,data)
  if(isTRUE(keep_significant_fourier)){
    all_significant_terms <- summary(model)$coeff[,4] < 0.05
    all_fourier_terms <- all_significant_terms[!names(all_significant_terms) %in% c("(Intercept)","time","indicator","indicator:shifted_time")]
    selected_fourier <- names(all_fourier_terms)[all_fourier_terms == TRUE]
    form_update_sig <-update(form_update, as.formula(paste0("~ . +", paste(selected_fourier, collapse= "+"))))
    model <- lm(form_update_sig,data)
  }
  cov_mat <- vcov(model)
  #Mean difference
  b <- (n-t_star)/2
  if (impact_model=="full"){
    beta_intervention <- model$coefficients["indicator"]
    beta_interaction <- model$coefficients["indicator:shifted_time"]
    mean_diff <- beta_intervention+beta_interaction*b
    var_lin <- cov_mat["indicator","indicator"]+b^2*cov_mat["indicator:shifted_time","indicator:shifted_time"]+b*2*cov_mat["indicator","indicator:shifted_time"]
  } else if (impact_model=="level"){
    beta_intervention <- model$coefficients["indicator"]
    mean_diff <- beta_intervention
    var_lin <- cov_mat["indicator","indicator"]
  } else if (impact_model=="slope"){
    beta_interaction <- model$coefficients["indicator:shifted_time"]
    mean_diff <- beta_interaction*b
    var_lin <- b^2*cov_mat["indicator:shifted_time","indicator:shifted_time"]#+b*2*cov_mat["indicator","indicator:shifted_time"]
  }
  # b <- (n-t_star)/2
  # beta_intervention <- model$coefficients["indicator"]
  # beta_interaction <- model$coefficients["indicator:shifted_time"]
  # mean_diff <- beta_intervention+beta_interaction*b
  # cov_mat <- vcov(model)
  # var_lin <- cov_mat["indicator","indicator"]+b^2*cov_mat["indicator:shifted_time","indicator:shifted_time"]+b*2*cov_mat["indicator","indicator:shifted_time"]
  # #lin_est <- beta_intervention+b*beta_interaction
  CI <- c(mean_diff-1.96*sqrt(var_lin),mean_diff+1.96*sqrt(var_lin))
  TS <- mean_diff/sqrt(var_lin)
  p_value <- round(2*pnorm(abs(TS),lower.tail = FALSE),2)
  ret_vec <-c(mean_diff, CI ,p_value)
  names(ret_vec) <- c("Mean difference", "2.5% CI", "97.5% CI","P-value")
  summar <- summary(model)
  # print(summar)
  # print(ret_vec)
  summar[["Mean difference"]] <- ret_vec #or model[["RR"]] <- ret_vec and then return(model)

  #add predictions and compute effect size
  predA <- predict(model,type="response")
  data$pred <- predA
  intervention_predictions <- data$pred[data$indicator==1]
  s1<- var(intervention_predictions)
  # Compute counterfactual values
  new_data <- data
  new_data$indicator <- 0
  pred_counter <- predict(model,type="response",newdata = new_data)
  data$predC <- pred_counter
  data$predC[data$indicator!=1] <- NA
  data <- as_tibble(data)
  counterfacual_predictions <- na.omit(data$predC)
  s2 <- var(counterfacual_predictions)
  s <- sqrt((s1+s2)/2)
  # s <- sqrt(var(data[[response]][data$indicator==1]))
  # s <- sqrt(var(intervention_predictions-counterfacual_predictions))
  # s <- sqrt(s2)
  cohen_d <- mean_diff/s

  #bootstrap for CI and p-value
  #CI
  betas <- model$coefficients
  n_boot_samples <- 2000
  cohen_d_mat <- matrix(NA,n_boot_samples,2)
  model_copy <- model
  model_copy_null <- model
  for (i in 1:n_boot_samples){
    set.seed(i)
    sampled_beta <- mvrnorm(n=1, mu=betas, Sigma=cov_mat )
    sampled_beta_null <- sampled_beta
    if (impact_model=="full"){
      sampled_beta_null[c("indicator","indicator:shifted_time")] <- sampled_beta[c("indicator","indicator:shifted_time")]-betas[c("indicator","indicator:shifted_time")]
    } else if (impact_model=="level"){
      sampled_beta_null["indicator"] <- sampled_beta["indicator"]-betas["indicator"]
    } else if (impact_model=="slope"){
      sampled_beta_null["indicator:shifted_time"] <- sampled_beta["indicator:shifted_time"]-betas["indicator:shifted_time"]
    }
    model_copy$coefficients <- sampled_beta
    fitted_values <- predict(model_copy,type="response")[intervention_start_ind:n]
    counterfactuals <-  predict(model_copy,type="response",newdata = new_data)[intervention_start_ind:n]
    var_fitted <- var(fitted_values)
    var_counter <- var(counterfactuals)
    Sp <- sqrt((var_fitted+var_counter)/2)
    if (impact_model=="full"){
      MD <- sampled_beta["indicator"]+b*sampled_beta["indicator:shifted_time"]
    } else if (impact_model=="level"){
      MD <- sampled_beta["indicator"]
    } else if (impact_model=="slope"){
      MD <- b*sampled_beta["indicator:shifted_time"]
    }
    d <- MD/Sp
    cohen_d_mat[i,1] <- d
    model_copy$coefficients <- sampled_beta_null
    fitted_values <- predict(model_copy,type="response")[intervention_start_ind:n]
    counterfactuals <-  predict(model_copy,type="response",newdata = new_data)[intervention_start_ind:n]
    var_fitted <- var(fitted_values)
    var_counter <- var(counterfactuals)
    Sp <- sqrt((var_fitted+var_counter)/2)
    if (impact_model=="full"){
      MD <- sampled_beta_null["indicator"]+b*sampled_beta_null["indicator:shifted_time"]
    } else if (impact_model=="level"){
      MD <- sampled_beta_null["indicator"]
    } else if (impact_model=="slope"){
      MD <- b*sampled_beta_null["indicator:shifted_time"]
    }
    d <- MD/Sp
    cohen_d_mat[i,2] <- d
  }
  CI_low <- quantile(cohen_d_mat[,1],0.025)
  CI_up <- quantile(cohen_d_mat[,1],0.975)
  p_value_d <- mean(abs(cohen_d_mat[,2])>abs(cohen_d))

  ret_vec2 <-c(cohen_d, CI_low, CI_up ,p_value_d)
  names(ret_vec2) <- c("Cohen's d", "2.5% CI", "97.5% CI","P-value")
  if (isTRUE(print_summary)){
    print(summar)
    print(ret_vec)
  }
  print(ret_vec2)
  summar[["Cohen's d"]] <- ret_vec2

  # N <- 2*length(intervention_predictions)-2
  # J <- gamma(N/2) / (sqrt(N/2) * gamma((N-1)/2))
  # bias_corrected_d <- J*cohen_d
  # sig_d <- sqrt((8+bias_corrected_d^2)/(4*length(intervention_predictions)))
  # lower <- bias_corrected_d-1.96*sig_d
  # upper <- bias_corrected_d+1.96*sig_d
  # TS_d <- bias_corrected_d/sig_d
  # p_value_d <- round(2*pnorm(abs(TS_d),lower.tail = FALSE),2)
  #
  # ret_vec2 <-c(bias_corrected_d, lower, upper ,p_value_d)
  # names(ret_vec2) <- c("Adjusted Cohen's d", "2.5% CI", "97.5% CI","P-value")
  # print(summar)
  # print(ret_vec)
  # print(ret_vec2)
  # summar[["Cohen's d"]] <- ret_vec2

  if (!(isTRUE(counterfactual))){
    data$predC <- NULL
  }
  # #draw
  # Group <- levels( factor(c("Counterfactual","Fitted values")))
  # response <- sym(response)
  # p2 <- ggplot(data = data , aes(y = !!response, x = dt)) +
  #   geom_point()  + ylab(y_lab) + xlab("Date") +
  #   geom_line(aes(y = predC, x = dt, color="Counterfactual",linetype = "Counterfactual"), size=1,key_glyph = draw_key_smooth)+
  #   geom_line(aes(y = pred, x = dt, color="Fitted values",linetype = "Fitted values"), size=1,key_glyph = draw_key_smooth)+
  #   scale_color_manual("", values = c("Counterfactual"="red","Fitted values"="black"),breaks = Group) +
  #   scale_linetype_manual("",values = c("Counterfactual"=2,"Fitted values"=1),breaks = Group) +
  #   theme(legend.key=element_blank())
  # p2
  return(list(model=model, model_summary=summar, data=data))
}


#' ITS analysis for continuous outcomes
#'
#' \code{its_lm} fits a linear regression model to an ITS, and returns the model, the summary of the model (including the mean difference and Cohen's d), and the original data together with the model predictions.
#'
#' @param data The data frame corresponding to the supplied formula, existing of at least 2 variables: (1) the count outcome, and (2) a vector of time points
#' @param form A formula with the response on the left, followed by the ~ operator, and the covariates on the right, separated by + operators. The formula should not contain an offest term.
#' @param time_name A string giving the name of the time variable. The time variable may or may not be supplied as a covariate in the formula
#' @param intervention_start_ind Numeric - a number between 1 and nrow(data)-1 stating the time point of the start of the intervention
#' @param freq A positive integer describing the frequency of the time series.
#' @param seasonality A string specifying whether seasonality should be considered. Possible options include "none" corresponding to no seasonal adjustment, "full" corresponding to using freq-1 Fourier terms to model the seasonal component, and "significant" indicating whether only the significant Fourier terms should be considered in the seasonal adjustment. Default value is "none".
#' @param impact_model A string specifying the assumed impact model. Possible options include "full" corresponding to a model including both a level change and a slope change, "level" corresponding to a model including just a level change, and "slope" corresponding to a model including just a slope change. Default value is "full".
#' @param counterfactual Logical - indicating whether the model-based counterfactual values should also be returned as an additional column in the data. Default value is FALSE, in which case the counterfactual values are not returned.
#' @param print_summary Logical - indicating whethwe the entire model summary should be printed, or just the relevant effect size. Default value is FALSE in which case only the effect size, together with its 95\% CI and P-value, are printed.
#' @return The function returns a list with three elements: the fitted linear regression model, the summary of the model (including the mean difference and Cohen's d), and the original data together with the model predictions.
#' @examples
#' data <- unemployed
#' form <- as.formula("percent ~ time")
#' intervention_start_ind <- which(data$year==2020 & data$month>2| data$year==2021)[1]
#' fit <- its_lm(data=data,form=form,time_name = "time",intervention_start_ind=intervention_start_ind,freq=12, seasonality= "none", impact_model = "full",counterfactual = TRUE)
#' @importFrom tibble is_tibble as_tibble
#' @importFrom rlang is_formula sym
#' @importFrom stats as.formula glm lm pnorm ts vcov update predict var na.omit quantile
#' @importFrom forecast fourier
#' @importFrom MASS mvrnorm
#' @export
its_lm <- function(data, form, time_name, intervention_start_ind,freq, seasonality= "none",impact_model="full", counterfactual=FALSE,print_summary=FALSE){
  pred <- predC <- indicator <- NULL
  if (seasonality=="none"){
    out <- its_lm_wo_seas(data, form, time_name, intervention_start_ind,impact_model, counterfactual, print_summary)
  } else if (seasonality=="full"){
    keep_significant_fourier=FALSE
    out <- its_lm_fourier(data, form, time_name, intervention_start_ind,freq,keep_significant_fourier, impact_model, counterfactual, print_summary)
  } else if (seasonality=="significant"){
    keep_significant_fourier=TRUE
    out <- its_lm_fourier(data, form, time_name, intervention_start_ind,freq,keep_significant_fourier, impact_model, counterfactual, print_summary)
  } else {
    stop("Please enter a valid seasonality argument. Possible seasonality arguments are \"none\", \"full\", and \"significant\".")
  }
  return(out)
}

#' ITS analysis for count outcomes
#'
#' \code{its_poisson} fits a Poisson regression model to an ITS, and returns the model, the summary of the model (including the relative risk), and the original data together with the model predictions.
#'
#' @param data The data frame corresponding to the supplied formula, existing of at least 2 variables: (1) the count outcome, and (2) a vector of time points
#' @param form A formula with the response on the left, followed by the ~ operator, and the covariates on the right, separated by + operators. The formula should not contain an offest term.
#' @param offset_name either a string indicating the name of the offset column in the data, or NULL. Default value is NULL
#' @param time_name A string giving the name of the time variable. The time variable may or may not be supplied as a covariate in the formula
#' @param intervention_start_ind Numeric - a number between 1 and nrow(data)-1 stating the time point of the start of the intervention
#' @param over_dispersion Logical - indicating whether a quasi-Poisson model should be used to account for over dispersion (TRUE), or not (FLASE). Default value is FALSE.
#' @param freq A positive integer describing the frequency of the time series.
#' @param seasonality A string specifying whether seasonality should be considered. Possible options include "none" corresponding to no seasonal adjustment, "full" corresponding to using freq-1 Fourier terms to model the seasonal component, and "significant" indicating whether only the significant Fourier terms should be considered in the seasonal adjustment. Default value is "none".
#' @param impact_model A string specifying the assumed impact model. Possible options include "full" corresponding to a model including both a level change and a slope change, "level" corresponding to a model including just a level change, and "slope" corresponding to a model including just a slope change. Default value is "full".
#' @param counterfactual Logical - indicating whether the model-based counterfactual values should also be returned as an additional column in the data. Default value is FALSE, in which case the counterfactual values are not returned.
#' @param print_summary Logical - indicating whethwe the entire model summary should be printed, or just the relevant effect size. Default value is FALSE in which case only the effect size, together with its 95\% CI and P-value, are printed.
#' @return The function returns a list with three elements: the fitted Poisson regression model, the summary of the model (including the relative risk), and the original data together with the model predictions.
#' @examples
#' data <- unemployed
#' form <- as.formula("unemployed ~ time")
#' intervention_start_ind <- which(data$year==2020 & data$month>2| data$year==2021)[1]
#' fit <- its_poisson(data=data,form=form,offset_name="labour", time_name = "time",intervention_start_ind=intervention_start_ind,over_dispersion=TRUE, freq=12, seasonality= "none", impact_model = "full",counterfactual = TRUE)
#' @importFrom tibble is_tibble as_tibble
#' @importFrom rlang is_formula sym
#' @importFrom stats as.formula glm lm pnorm poisson quasipoisson ts vcov update predict var na.omit quantile
#' @importFrom forecast fourier
#' @importFrom MASS mvrnorm
#' @export
its_poisson <- function(data, form, offset_name=NULL,time_name, intervention_start_ind,over_dispersion=FALSE, freq, seasonality= "none",impact_model="full", counterfactual=FALSE,print_summary=FALSE){
  pred <- predC <- indicator <- NULL
  if (seasonality=="none"){
    out <- its_poisson_wo_seas(data, form, offset_name,time_name, intervention_start_ind,over_dispersion, impact_model, counterfactual, print_summary)
  } else if (seasonality=="full"){
    keep_significant_fourier=FALSE
    out <- its_poisson_fourier(data, form, offset_name,time_name, intervention_start_ind,over_dispersion, freq, keep_significant_fourier,impact_model, counterfactual, print_summary)
  } else if (seasonality=="significant"){
    keep_significant_fourier=TRUE
    out <- its_poisson_fourier(data, form, offset_name,time_name, intervention_start_ind,over_dispersion, freq, keep_significant_fourier,impact_model, counterfactual, print_summary)
  } else {
    stop("Please enter a valid seasonality argument. Possible seasonality arguments are \"none\", \"full\", and \"significant\".")
  }
  return(out)
}

#' ITS analysis for count outcomes with excess zeros
#'
#' \code{its_zero_inflated_poisson} fits a zero-inflated Poisson regression model to an ITS, and returns the model, the summary of the model (including the relative risk), and the original data together with the model predictions.
#'
#' @param data The data frame corresponding to the supplied formula, existing of at least 2 variables: (1) the count outcome, and (2) a vector of time points
#' @param form A formula with the response on the left, followed by the ~ operator, and the covariates on the right, separated by + operators. The formula should not contain an offest term.
#' @param offset_name either a string indicating the name of the offset column in the data, or NULL. Default value is NULL
#' @param time_name A string giving the name of the time variable. The time variable may or may not be supplied as a covariate in the formula
#' @param intervention_start_ind Numeric - a number between 1 and nrow(data)-1 stating the time point of the start of the intervention
#' @param freq A positive integer describing the frequency of the time series.
#' @param seasonality Logical - indicating whether seasonal adjustment via Fourier terms should be used. Default value is FALSE, in which case seasonal adjustment is not considered.
#' @param impact_model A string specifying the assumed impact model. Possible options include "full" corresponding to a model including both a level change and a slope change, "level" corresponding to a model including just a level change, and "slope" corresponding to a model including just a slope change. Default value is "full".
#' @param counterfactual Logical - indicating whether the model-based counterfactual values should also be returned as an additional column in the data. Default value is FALSE, in which case the counterfactual values are not returned.
#' @param print_summary Logical - indicating whethwe the entire model summary should be printed, or just the relevant effect size. Default value is FALSE in which case only the effect size, together with its 95\% CI and P-value, are printed.
#' @return The function returns a list with three elements: the fitted Poisson regression model, the summary of the model (including the relative risk), and the original data together with the model predictions.
#' @examples
#' data <- zero_inflated_sim_data
#' form <- as.formula("monthly_total ~ time")
#' intervention_start_ind <- which(data$Year==2020 & data$Month==3)
#' fit <- its_zero_inflated_poisson(data=data,form=form, time_name = "time",intervention_start_ind=intervention_start_ind, freq=12, seasonality=TRUE, impact_model = "full",counterfactual = TRUE)
#' @importFrom tibble is_tibble as_tibble
#' @importFrom rlang is_formula sym
#' @importFrom stats as.formula glm lm pnorm poisson quasipoisson ts vcov update predict var na.omit quantile
#' @importFrom forecast fourier
#' @importFrom MASS mvrnorm
#' @importFrom pscl zeroinfl
#' @export
its_zero_inflated_poisson <- function(data, form, offset_name=NULL,time_name, intervention_start_ind, freq, seasonality= FALSE,impact_model="full", counterfactual=FALSE,print_summary=FALSE){
  pred <- predC <- indicator <- NULL
  if (!isTRUE(seasonality)){
    out <- its_zero_inflated(data, form, offset_name,time_name, intervention_start_ind, impact_model, counterfactual, print_summary)
  } else {
    out <- its_zero_inflated_fourier(data, form, offset_name,time_name, intervention_start_ind, freq,impact_model, counterfactual, print_summary)
  }
  return(out)
}


#' Plot ITS fitted values
#'
#' \code{plot_its_lm} uses ggplot2 to plot the model-based fitted values, together with a scatterplot of the observed time series
#'
#' @param data The data frame corresponding to the supplied formula, existing of at least 2 variables: (1) the count outcome, and (2) a vector of time points
#' @param intervention_start_ind Numeric - a number between 1 and nrow(data)-1 stating the time point of the start of the intervention
#' @param y_lab a string with the y axis label for the predictions
#' @param response The name of the response variable to be plotted in the scatterplot
#' @param date_name A string giving the name of the date column. The date column must be a Date object.
#' @return a ggplot object including a scatterplot of the time series, the predictions line, and the counterfactual predictions in red (if available.
#' @examples
#' data <- unemployed
#' form <- as.formula("percent ~ time")
#' intervention_start_ind <- which(data$year==2020 & data$month>2| data$year==2021)[1]
#' fit <- its_lm(data=data,form=form, time_name = "time",intervention_start_ind=intervention_start_ind, freq=12, seasonality= "none", impact_model = "full",counterfactual = TRUE)
#' new_data <- fit$data
#' y_lab <- "Unemployment percent"
#' response <- "percent"
#' p <- plot_its_lm(new_data,intervention_start_ind,y_lab,response, "dt")
#' @importFrom lubridate is.Date
#' @importFrom ggplot2 ggplot aes geom_point ylab xlab geom_line draw_key_smooth geom_segment scale_color_manual scale_linetype_manual theme element_blank theme_bw
#' @export
plot_its_lm <- function(data,intervention_start_ind,y_lab,response,date_name){
  pred <- predC <- indicator <- NULL
  if (!(is.data.frame(data) | is_tibble(data))){
    stop("Please make sure that data is either a data frame or a tibble")
  }
  if (!(response %in% colnames(data))){
    stop("Please make sure that the response variable belongs to colnames(data)")
  }
  if (!(date_name %in% colnames(data))){
    stop("Please make sure that the date_name column belongs to colnames(data)")
  } else if (!(is.Date(data[[date_name]]))){
    stop("Please make sure that the date_name column is a Date object")
  }
  n <- nrow(data)
  if (!(intervention_start_ind>1 & intervention_start_ind<=n)){
    stop("Please make sure that intervention_start_ind is a value greater than 1 and less or equal to nrow(data). intervention_start_ind is the index ")
  }
  if (!("pred" %in% colnames(data))){
    stop("The data needs to include the column of predictions. Please use as input the data output of the function its_lm()")
  }
  if (!(is.character(y_lab))){
    y_lab <- ""
    print("y_lab is not a string and thus will not by used")
  }
  response <- sym(response)
  xdot <- data[[date_name]][intervention_start_ind]
  date_name <- sym(date_name)
  data_pre <- data[1:(intervention_start_ind-1),]
  data_post <- data[intervention_start_ind:nrow(data),]
  if ("predC" %in% colnames(data)){
    Group <- levels( factor(c("Counterfactual","Fitted values")))
    p <- ggplot(data = data , aes(y = !!response, x = !!date_name)) +
      geom_point()  + ylab(y_lab) + xlab("Date") +
      geom_line(aes(y = predC, x = !!date_name, color="Counterfactual",linetype = "Counterfactual"), size=1,key_glyph = draw_key_smooth,na.rm=TRUE)+
      geom_line(data = data_pre, aes(y = pred, x = !!date_name, color="Fitted values",linetype = "Fitted values"), size=1,key_glyph = draw_key_smooth)+
      geom_line(data = data_post, aes(y = pred, x = !!date_name, color="Fitted values",linetype = "Fitted values"), size=1,key_glyph = draw_key_smooth)+
      geom_segment(data = data, aes(x=xdot,xend=xdot,y=pred[intervention_start_ind-1],yend=pred[intervention_start_ind]), linetype=3,size=1) +
      scale_color_manual("", values = c("Counterfactual"="red","Fitted values"="black"),breaks = Group) +
      scale_linetype_manual("",values = c("Counterfactual"=2,"Fitted values"=1),breaks = Group) +
      theme(legend.key=element_blank()) +theme_bw()
  } else {
    p <- ggplot(data = data , aes(y = !!response, x = !!date_name)) +
      geom_point()  + ylab(y_lab) + xlab("Date") +
      #geom_line(aes(y = pred, x = !!date_name), size=1,key_glyph = draw_key_smooth,na.rm=TRUE)+
      geom_line(data = data_pre, aes(y = pred, x = !!date_name), size=1,key_glyph = draw_key_smooth)+
      geom_line(data = data_post, aes(y = pred, x = !!date_name), size=1,key_glyph = draw_key_smooth)+
      geom_segment(data = data, aes(x=xdot,xend=xdot,y=pred[intervention_start_ind-1],yend=pred[intervention_start_ind]), linetype=3,size=1) +
      theme(legend.key=element_blank()) +theme_bw()
  }
  return(p)
}

#' Plot ITS fitted values
#'
#' \code{plot_its_poisson} uses ggplot2 to plot the model-based fitted values, together with a scatterplot of the observed time series
#'
#' @param data The data frame corresponding to the supplied formula, existing of at least 2 variables: (1) the count outcome, and (2) a vector of time points
#' @param intervention_start_ind Numeric - a number between 1 and nrow(data)-1 stating the time point of the start of the intervention
#' @param y_lab a string with the y axis label for the predictions
#' @param response The name of the response variable to be plotted in the scatterplot
#' @param offset_name either a string indicating the name of the offset column used in the ITS Poisson model, or NULL. Default value is NULL. When offfset_name exists, the function plots the response rate, instead of the count itself.
#' @param date_name A string giving the name of the date column. The date column must be a Date object.
#' @return a ggplot object including a scatterplot of the time series, the predictions line, and the counterfactual predictions in red (if available).
#' @examples
#' data <- unemployed
#' form <- as.formula("unemployed ~ time")
#' intervention_start_ind <- which(data$year==2020 & data$month>2| data$year==2021)[1]
#' fit <- its_poisson(data=data,form=form,offset_name="labour", time_name = "time",intervention_start_ind=intervention_start_ind,over_dispersion=TRUE, freq=12, seasonality= "none", impact_model = "full",counterfactual = TRUE)
#' new_data <- fit$data
#' y_lab <- "Unemployment percent"
#' response <- "percent"
#' p <- plot_its_poisson(new_data,intervention_start_ind,y_lab,response, offset_name = "labour", "dt")
#' @importFrom lubridate is.Date
#' @importFrom ggplot2 ggplot aes geom_point ylab xlab geom_line draw_key_smooth geom_segment scale_color_manual scale_linetype_manual theme element_blank theme_bw
#' @export
plot_its_poisson <- function(data,intervention_start_ind,y_lab,response, offset_name = NULL, date_name){
  pred <- predC <- indicator <- NULL
  if (!(is.data.frame(data) | is_tibble(data))){
    stop("Please make sure that data is either a data frame or a tibble")
  }
  if (!(response %in% colnames(data))){
    stop("Please make sure that the response variable belongs to colnames(data)")
  }
  if (!(date_name %in% colnames(data))){
    stop("Please make sure that the date_name column belongs to colnames(data)")
  } else if (!(is.Date(data[[date_name]]))){
    stop("Please make sure that the date_name column is a Date object")
  }
  n <- nrow(data)
  if (!(intervention_start_ind>1 & intervention_start_ind<=n)){
    stop("Please make sure that intervention_start_ind is a value greater than 1 and less or equal to nrow(data). intervention_start_ind is the index ")
  }
  if (!("pred" %in% colnames(data))){
    stop("The data needs to include the column of predictions. Please use as input the data output of the function its_lm()")
  }
  if (!(is.character(y_lab))){
    y_lab <- ""
    print("y_lab is not a string and thus will not by used")
  }
  if (!(is.null(offset_name))){
    if (!(offset_name %in% colnames(data))){
      stop("Please make sure that offset_name belongs to colnames(data)")
    } else {
      data$pred <- data$pred*100/data[[offset_name]]
      data[[response]] <- data[[response]]*100/data[[offset_name]]
      if ("predC" %in% colnames(data)){
        data$predC <- data$predC*100/data[[offset_name]]
      }
    }
  }
  response <- sym(response)
  xdot <- data[[date_name]][intervention_start_ind]
  date_name <- sym(date_name)
  data_pre <- data[1:(intervention_start_ind-1),]
  data_post <- data[intervention_start_ind:nrow(data),]
  if ("predC" %in% colnames(data)){
    Group <- levels( factor(c("Counterfactual","Fitted values")))
    p <- ggplot(data = data , aes(y = !!response, x = !!date_name)) +
      geom_point()  + ylab(y_lab) + xlab("Date") +
      geom_line(aes(y = predC, x = !!date_name, color="Counterfactual",linetype = "Counterfactual"), size=1,key_glyph = draw_key_smooth,na.rm=TRUE)+
      geom_line(data = data_pre, aes(y = pred, x = !!date_name, color="Fitted values",linetype = "Fitted values"), size=1,key_glyph = draw_key_smooth)+
      geom_line(data = data_post, aes(y = pred, x = !!date_name, color="Fitted values",linetype = "Fitted values"), size=1,key_glyph = draw_key_smooth)+
      geom_segment(data = data, aes(x=xdot,xend=xdot,y=pred[intervention_start_ind-1],yend=pred[intervention_start_ind]), linetype=3,size=1) +
      scale_color_manual("", values = c("Counterfactual"="red","Fitted values"="black"),breaks = Group) +
      scale_linetype_manual("",values = c("Counterfactual"=2,"Fitted values"=1),breaks = Group) +
      theme(legend.key=element_blank()) +theme_bw()
  } else {
    p <- ggplot(data = data , aes(y = !!response, x = !!date_name)) +
      geom_point()  + ylab(y_lab) + xlab("Date") +
      #geom_line(aes(y = pred, x = !!date_name), size=1,key_glyph = draw_key_smooth,na.rm=TRUE)+
      geom_line(data = data_pre, aes(y = pred, x = !!date_name), size=1,key_glyph = draw_key_smooth)+
      geom_line(data = data_post, aes(y = pred, x = !!date_name), size=1,key_glyph = draw_key_smooth)+
      geom_segment(data = data, aes(x=xdot,xend=xdot,y=pred[intervention_start_ind-1],yend=pred[intervention_start_ind]), linetype=3,size=1) +
      theme(legend.key=element_blank()) +theme_bw()
  }
  return(p)
}
Yael-Travis-Lumer/its2es documentation built on Oct. 31, 2022, 8:05 a.m.