rkHMM:

#' Fit Regime Switching Model on a Data.Frame
#'
#' TODO(Ricky Kotecha): 2018/02/23 How do you do a see? Should the export be false?
#'
#' @param time_series
#' @param number_of_states
#' @param em_iterations
#' @param initial_probabilities
#' @param verbose
#' @param random.start
#'
#' @return
#' @export
#'
#' @examples
fit_regime_model <- function(time_series,
                             number_of_states = 2,
                             em_iterations = 1000,
                             initial_probabilities = c(1, rep(0, number_of_states - 1)),
                             verbose = FALSE,
                             random.start = TRUE) {
  input_time_series <- time_series

  if (!class(time_series) %in% c("matrix", "data.frame")) {
    stop("This function has not been tested for non matrix and dataframe inputs")
  }

  if (any(is.na(time_series))) {
    stop("Time series has na values")
  }

  input_indices <- colnames(time_series)

  invisible(
    plyr::llply(
      input_time_series,
      function(column) {
        if (!is.numeric(column)) {
          stop("All columns must be of class numeric")
        }
      })
  )

  if (ncol(time_series) == 1) {
    if (class(time_series) == "matrix") {
      stop("single variable only works on data frames for the time being")
    }


    formulae <- plyr::alply(
      .data = colnames(time_series),
      .margins = 1,
      .fun = function(x) {
        as.formula(paste(x, " ~ 1"))
      }
    )
    #formulae = as.formula("time_series ~ 1")
    model <- depmixS4::depmix(
      formulae,
      family = list(gaussian()),
      nstates = number_of_states,
      data = time_series,
      instart = initial_probabilities
    )

    fitted_model <- depmixS4::fit(model, verbose = verbose, emcontrol = depmixS4::em.control(maxit = em_iterations))


    states <- plyr::ldply(fitted_model@response, function(x) ( data.frame(x[[1]]@parameters) ))
    states$state <- 1:nrow(states)

    time_series_fit <- depmixS4::posterior(fitted_model)
    time_series_fit_all <- cbind(time_series_fit, time_series)

    #reorder states
    states$old_state <- states$state
    states <- dplyr::arrange(states, sd)
    states$state <- 1:nrow(states)

    time_series_fit_all <- dplyr::rename(time_series_fit_all, old_state = state)

    time_series_fit_all$order <- 1:nrow(time_series_fit_all)
    time_series_fit_all_ordered <- merge(
      time_series_fit_all,
      states,
      by.x = "old_state",
      by.y = "old_state"
    )
    time_series_fit_all_ordered <- dplyr::arrange(time_series_fit_all_ordered, order)

    stopifnot(nrow(time_series_fit_all_ordered) == nrow(time_series_fit_all))

    # reorder the S* probabilities
    df <- data.frame(current_state = 1:number_of_states)
    df <- merge(df, states, by.x = "current_state", by.y = "old_state")
    df <- dplyr::arrange(df, current_state)
    names(time_series_fit_all_ordered)[names(time_series_fit_all_ordered) %in%
                                         paste("S", df$current_state, sep = "")] <-
      paste("S", df$state, sep = "")
    time_series_fit_all_ordered$order <- NULL
    time_series_fit_all_ordered$old_state <- NULL

    actual_sd <- time_series_fit_all_ordered %>%
      dplyr::group_by(state) %>%
      dplyr::summarise_at(.vars = c(names(time_series)), sd)
    names(actual_sd)[2] <- "actual_sd"

    expected_sd <- time_series_fit_all_ordered %>%
      dplyr::group_by(state) %>%
      dplyr::summarise(
        expected_sd = dplyr::first(sd)
      )
    state_sds <- dplyr::left_join(actual_sd, expected_sd, "state")

    if (!all(order(state_sds$actual_sd) == order(state_sds$expected_sd)) ) {
      warning("state vols don't match actual vol", state_sds)
    }

    states <- merge(states, state_sds, by.x = "state", by.y = "state")

    transition_matrix <- t(fitted_model@trDens[1, , ])
    states <- dplyr::arrange(states, state)
    transition_matrix <- transition_matrix[states$old_state, states$old_state]


    fitted_model <- list(
      fitted_model = fitted_model,
      transition_matrix = transition_matrix,
      states = states,
      time_series = time_series_fit_all_ordered
    )
  } else {
    fitted_model <- fit_multivariable(
      time_series,
      n_states = number_of_states,
      verbose,
      init_p = initial_probabilities,
      random.start
    )
  }

  # if there was an effective in the time series add it back in
  if ("effective_date" %in% names(input_time_series)) {
    fitted_model$time_series$effective_date <- date_idx
  }
  fitted_model$input_indices <- input_indices

  class(fitted_model) <- c("rk_regime_model", class(fitted_model))
  fitted_model
}