exploratory_func: R package for Exploratory

calc_conf_mat <- function(actual, predicted) {
  df <- data.frame(
    actual_value = actual,
    predicted_value = predicted
  ) %>%
    dplyr::filter(!is.na(predicted_value))

  # get count if it's classification
  df <- df %>%
    dplyr::group_by(actual_value, predicted_value) %>%
    dplyr::summarize(count = n()) %>%
    dplyr::ungroup()

  if (is.logical(df$actual_value) || is.logical(df$predicted_value)) { # For logical, make them factors to fill with 0.
    df <- df %>%
      dplyr::mutate(actual_value=factor(actual_value,levels=c("TRUE","FALSE")), predicted_value=factor(predicted_value,levels=c("TRUE","FALSE")))
  }
  df <- df %>%
    tidyr::complete(actual_value, predicted_value, fill=list(count=0))
  df
}

# For backward compatibilities for broom's rowwise tidier, which was dropped at broom 0.7.0.
#' @export
glance_rowwise <- function(df, model, ...) {
  # Intended to do the same the following line. 
  # summarize(df, broom::glance(!!rlang::enquo(model), ...))
  model_col <- tidyselect::vars_select(names(df), !! rlang::enquo(model))
  group_cols <- grouped_by(df)
  ret <- df %>% dplyr::ungroup() %>% 
    # res[group_cols]<-NULL is a dirty hack to avoid column name conflict at unnest.
    dplyr::mutate(.res=purrr::map(!!rlang::sym(model_col), function(x){res<-broom::glance(x, ...);res[group_cols]<-NULL;res})) %>%
    dplyr::select(!!!rlang::syms(group_cols), .res) %>%
    tidyr::unnest(.res)
  if (length(group_cols) > 0) {
    ret <- ret %>% dplyr::group_by(!!!rlang::syms(group_cols))
  }
  ret
}

#' @export
tidy_rowwise <- function(df, model, ...) {
  # summarize(df, broom::tidy(!!rlang::enquo(model), ...))
  # The above was the originally intended code, but this gave error like below with group_by case.
  #
  # x subscript out of bounds
  # Input `..1` is `broom::tidy(model, ...)`.
  #
  # Thus, ending up doing the same with purrr::map. It seems this is more stable as of now.
  model_col <- tidyselect::vars_select(names(df), !! rlang::enquo(model))
  group_cols <- grouped_by(df)
  ret <- df %>% dplyr::ungroup() %>% 
    # res[group_cols]<-NULL is a dirty hack to avoid column name conflict at unnest. Without it, test_stats_wrapper.R fails. TODO: We should fix do_on_each_group instead of this.
    dplyr::mutate(.res=purrr::map(!!rlang::sym(model_col), function(x){res<-broom::tidy(x, ...);res[group_cols]<-NULL;res})) %>%
    dplyr::select(!!!rlang::syms(group_cols), .res) %>%
    tidyr::unnest(.res)
  if (length(group_cols) > 0) {
    ret <- ret %>% dplyr::group_by(!!!rlang::syms(group_cols))
  }
  # Bring the Note column to the last. Without this, if a summary row with only the Note column happens to be the one from the first group,
  # it appears as the first column after the group column.
  if (length(colnames(ret)) > 0) { # Do this only there actually are columns to avoid error.
    ret <- ret %>% dplyr::relocate(any_of(c("Note")), .after = last_col())
  }
  ret
}

#' @export
augment_rowwise <- function(df, model, ...) {
  # Intended to do the same the following line. 
  # summarize(df, broom::augment(!!rlang::enquo(model), ...))
  model_col <- tidyselect::vars_select(names(df), !! rlang::enquo(model))
  group_cols <- grouped_by(df)
  ret <- df %>% dplyr::ungroup() %>% 
    # res[group_cols]<-NULL is a dirty hack to avoid column name conflict at unnest.
    dplyr::mutate(.res=purrr::map(!!rlang::sym(model_col), function(x){res<-broom::augment(x, ...);res[group_cols]<-NULL;res})) %>%
    dplyr::select(!!!rlang::syms(group_cols), .res) %>%
    tidyr::unnest(.res)
  if (length(group_cols) > 0) {
    ret <- ret %>% dplyr::group_by(!!!rlang::syms(group_cols))
  }
  ret
}

# augment_rowwise with data argument.
# Used only from augment_kmeans for now.
augment_rowwise_data <- function(df, model, data, ...) {
  # Intended to do the same the following line. 
  # summarize(df, broom::augment(!!rlang::enquo(model), ...))
  model_col <- tidyselect::vars_select(names(df), !! rlang::enquo(model))
  data_col <- tidyselect::vars_select(names(df), !! rlang::enquo(data))
  group_cols <- grouped_by(df)
  ret <- df %>% dplyr::ungroup() %>% 
    # res[group_cols]<-NULL is a dirty hack to avoid column name conflict at unnest.
    dplyr::mutate(.res=purrr::map2(!!rlang::sym(model_col), !!rlang::sym(data_col), function(m, d){res<-broom::augment(m, data=d, ...);res[group_cols]<-NULL;res})) %>%
    dplyr::select(!!!rlang::syms(group_cols), .res) %>%
    tidyr::unnest(.res)
  if (length(group_cols) > 0) {
    ret <- ret %>% dplyr::group_by(!!!rlang::syms(group_cols))
  }
  ret
}

#' tidy/glance/augment wrapper
#' @export
model_info <- function(df, model, output = "summary", ...) {
  model_col <- tidyselect::vars_select(names(df), !! rlang::enquo(model))
  if (output == "summary") {
    glance_rowwise(df, !!rlang::sym(model_col), ...)
  }
  else if (output == "variables") {
    tidy_rowwise(df, !!rlang::sym(model_col), ...)
  }
  else if (output == "data") {
    augment_rowwise(df, !!rlang::sym(model_col), ...)
  }
  else {
    stop('output argument has to be "summary", "variables", or "data".')
  }
}

#' glance for lm
#' @export
glance_lm <- glance_rowwise

#' glance for glm
#' @export
glance_glm <- glance_rowwise

#' glance for kmeans
#' @export
glance_kmeans <- glance_rowwise

#' tidy for lm
#' @export
tidy_lm <- tidy_rowwise
#' tidy for glm
#' @export
tidy_glm <- tidy_rowwise
#' tidy for kmeans
#' @export
tidy_kmeans <- tidy_rowwise

#' augment for lm
#' @export
augment_lm <- augment_rowwise
#' augment for glm
#' @export
augment_glm <- augment_rowwise


#' augment for kmeans
#' @export
augment_kmeans <- function(df, model, data){
  validate_empty_data(df)

  model_col <- col_name(substitute(model))
  data_col <- col_name(substitute(data))
  if(!model_col %in% colnames(df)){
    stop(paste(model_col, "is not in column names"), sep=" ")
  }
  if(!data_col %in% colnames(df)){
    stop(paste(data_col, "is not in column names"), sep=" ")
  }
  if (is.null(attr(df$model[[1]], "subject_colname"))) { # Distinguish between columns case ank skv case.
    # use do.call to evaluate data_col from a variable. # TODO: Does this reason still make sense?
    ret <- do.call(augment_rowwise_data, list(df, model_col, data=data_col))
    # cluster column is factor labeled "1", "2"..., so convert it to integer to avoid confusion
    ret[[ncol(ret)]] <- ret[[ncol(ret)]]
  }
  else {
    loadNamespace("dplyr")
    # bind .cluster column refering subject names
    grouped_col <- grouped_by(df)
    cluster_col <- avoid_conflict(grouped_col, ".cluster")

    augment_each <- function(df_each){
      source_data <- df_each[[data_col]]
      kmeans_obj <- df_each[[model_col]]
      if(!is.data.frame(source_data)){
        source_data <- source_data[[1]]
        kmeans_obj <- kmeans_obj[[1]]
      }

      subject_colname <- attr(kmeans_obj, "subject_colname")
      index <- as.factor(source_data[[subject_colname]])
      source_data[[cluster_col]] <- kmeans_obj$cluster[index]
      source_data
    }

    ret <- df %>%
      dplyr::do_(.dots=setNames(list(~augment_each(.)), model_col)) %>%
      dplyr::ungroup() %>%
      unnest_with_drop(!!rlang::sym(model_col))
  }
  # update .cluster to cluster or cluster.new if it exists
  colnames(ret)[[ncol(ret)]] <- avoid_conflict(colnames(ret), "cluster")
  ret
}

#' apply data frame with model to a data frame
#' @param df Data frame to predict
#' @param model_df Data frame that has model
#' @param ... Additional argument to be passed to broom::augment
#' @export
add_prediction <- function(df, model_df, conf_int = 0.95, ...){
  if (any(class(model_df$model[[1]]) %in% c("coxph_exploratory", "ranger_survival_exploratory"))) { # For now this is only for Cox regression Analytics View model.
    return(add_prediction2(df, model_df, conf_int = conf_int, ...))
  }

  validate_empty_data(df)

  # parsing arguments of add_prediction and getting optional arguemnt for augment in ...
  cll <- match.call()
  aug_args <- expand_args(cll, exclude = c("df", "model_df"))

  # model_df should not be rowwise grouped here. TODO: Should this be done here, or should we do this when model_df is created, for example in build_model?
  model_df <- model_df %>% dplyr::ungroup()

  # For now predict only with the first model even if the model_df has multiple rows.
  model_df <- model_df %>% dplyr::slice(1:1)

  # validate data frame based on meta info
  model_meta <- model_df[[".model_metadata"]]
  # This is only for Analytics Step.
  # Analytics View does not add .model_metadata column. The logic here is too rough for redoing Analytics View's preprocessing.
  if(!is.null(model_meta)){
    types <- model_meta[[1]]$types
    if(!is.null(types)){
      validate_data(types, df)
    }
    # turn character predictor columns into factors
    # with the same levels as training data.
    flevels <- model_meta[[1]]$flevels
    if(!is.null(types)){
      df <- factorize_data(flevels, df)
    }
  }

  get_result <- function(model_df, df, aug_args, with_response){
    # Use formula to support expanded aug_args (especially for type.predict for logistic regression)
    # because ... can't be passed to a function inside mutate directly.
    model_class_name <- class(model_df$model[[1]])[1]
    if (!exists(paste0('augment.', model_class_name))) {
      stop("EXP-ANA-4 :: [] :: This is not a prediction model.")
    }
    aug_fml <- if(aug_args == ""){
      "broom::augment(m, newdata = df)"
    } else {
      paste0("broom::augment(m, newdata = df, ", aug_args, ")")
    }
    ret <- tryCatch({
      model_df %>%
        # result of aug_fml will be stored in .text_index column
        # .test_index is used because model_df has it and won't be used here
        dplyr::mutate(.test_index=purrr::map(model, function(m){eval(parse(text=aug_fml))})) 
    }, error = function(e) {
      if (!is.null(e$parent)) { # With dplyr 1.0.8, the original error inside mutate is in e$parent. Re-throw that error.
        if (!is.null(e$parent$parent)) { # With dplyr 1.0.10, the original error inside mutate is in e$parent$parent. Re-throw that error.
          stop(e$parent$parent)
        }
        else {
          stop(e$parent)
        }
      }
      else {
        stop(e)
      }
    })

    ret <- if(with_response) {
      ret %>%
        dplyr::mutate(.test_index = purrr::map2(.test_index, model, function(d, m){
          # add predicted_response to the result data frame
          add_response(d, m, "predicted_response")
        }))
    } else {
      ret
    }

    # Avoid name conflict error between group column and columns inside .test_index at unnest.
    # We used to add .group to the group column, but that caused error at a caller dealing with this function's output.
    # So, we are using tidyr_legacy function that changes the column inside .test_index rather than the group column
    # in case of conflict for now.
    ret$source.data <- NULL
    ret$model <- NULL
    ret$.model_metadata <- NULL
    ret <- ret %>% tidyr::unnest(.test_index, names_repair=tidyr::tidyr_legacy) 
    ret
  }

  # if type.predict argument is not indicated in this function
  # and models have $family$linkinv (basically, glm models have it),
  # both fitted link value column and response value column should appear in the result
  with_response <- !("type.predict" %in% names(cll)) &&
                   any(lapply(model_df$model, function(s) {
                     # For Analytics View (glm_exploratory), response is added inside augment, to keep this code model-agnostic.
                     # This piece of model-specific code here should go away, if we migrate from model steps to analytics view models.
                     "glm_exploratory" %nin% class(s) && "family" %in% names(s) && !is.null(s$family$linkinv)
                   }))

  ret <- tryCatch({
    get_result(model_df, df, aug_args, with_response)
  }, error = function(e){
    if (grepl("arguments imply differing number of rows: ", e$message)) {
      # In this case, df has categories that aren't in model.
      # For example, a model that was created by "category" column which has "a", "b"
      # causes an error if df has "category" columnm which has "c".

      filtered_data <- df

      for(model in model_df[["model"]]){
        # remove rows that have categories that aren't in model
        # otherwise, broom::augment causes an error
        for (cname in colnames(model$model)) {
          # just filter categorical (not numeric) columns
          if (!is.numeric(model$model[[cname]])){
            filtered_data <- filtered_data[filtered_data[[cname]] %in% model$model[[cname]], ]
          }
        }
      }

      if (nrow(filtered_data) == 0) {
        stop("not enough information to predict in data frame")
      }

      get_result(model_df, filtered_data, aug_args, with_response)
    } else {
      stop(e$message)
    }
  })
  ret <- add_confint(ret, conf_int)
  # For Analytics View for GLM (including logistic regression), rename .fitted to linear_predictor.
  if ("glm_exploratory" %in% class(model_df$model[[1]])) {
    colnames(ret)[colnames(ret) == ".fitted"] <- avoid_conflict(colnames(ret), "linear_predictor")
  }
  else {
    colnames(ret)[colnames(ret) == ".fitted"] <- avoid_conflict(colnames(ret), "predicted_value")
  }
  colnames(ret)[colnames(ret) == ".se.fit"] <- avoid_conflict(colnames(ret), "standard_error")
  colnames(ret)[colnames(ret) == ".resid"] <- avoid_conflict(colnames(ret), "residual")
  # For Analytics View for GLM binomial (including logistic regression), rename predicted_response to predicted_probability.
  # Since we keep the output column name from augment predicted_response and adjust the column name in prediction_binary for training/test,
  # we do the same kind of layering here for new data prediction too, and do the renaming here at the caller of the augment function,
  # rather than inside the augment function.
  # For prediction by Analytics Step, we also use prediction_binary, and the column renaming is done there.
  if ("glm_exploratory" %in% class(model_df$model[[1]]) && model_df$model[[1]]$family$family == "binomial") {
    colnames(ret)[colnames(ret) == "predicted_response"] <- avoid_conflict(colnames(ret), "predicted_probability")
  }

  ret
}

#' assign cluster number to each rows
#' @export
assign_cluster <- function(df, source_data){
  validate_empty_data(df)

  df_cnames <- colnames(df)
  # columns that are not model related are regarded as grouping column
  grouping_cols <- df_cnames[!df_cnames %in% c("model", "source.data")]

  # remove na from selected column (column names from cluster centers)
  # without this, augment throws an error with different number of data
  center_cnames <- colnames(df[["model"]][[1]]$centers)
  for (center_cname in center_cnames) {
    source_data <- source_data[!is.na(source_data[[center_cname]]), ]
  }

  source <- if(any(colnames(source_data) %in% grouping_cols)){
    # nest the source data by each group
    source_data %>%
      dplyr::group_by(!!!rlang::syms(grouping_cols)) %>%
      tidyr::nest()
  } else {
    # nest without grouping.
    source_data %>%
      tidyr::nest()
  }

  # drop unnecessary columns
  df <- dplyr::select(df, !!!c("model", grouping_cols))
  # augment data by each row
  # ungroup is needed because grouping is reseted by bind_cols

  joined <- if(length(grouping_cols) > 0) {
    # need this because do and nest has different row order,
    # so grouping_col should be used for index
    dplyr::inner_join(df, source, by = grouping_cols)
  } else {
    dplyr::bind_cols(df, source)
  }

  ret <- joined %>%
    dplyr::ungroup() %>%
    dplyr::mutate(augmented = purrr::map2(model, data, function(model, data) {
      broom::augment(model, data = data)
    })) %>% unnest_with_drop(augmented)

  # change factor to numeric
  ret[[".cluster"]] <- as.numeric(ret[[".cluster"]])
  colnames(ret)[colnames(ret) == ".cluster"] <- avoid_conflict(colnames(ret), "cluster")
  ret
}

#' tidy wrapper for kmeans
#' @export
cluster_info <- function(df){
  validate_empty_data(df)

  # , col.names = names(model[["cluster"]])
  ret <- df %>%
    dplyr::ungroup() %>%
    dplyr::mutate(model = purrr::map(model, function(m){
      info <- data.frame(
        Size = m$size,
        Withinss = m$withinss
      )
      ret <- m$centers %>%
        as.data.frame()
      colnames(ret) <- paste("Center", colnames(ret))
      ret <- ret %>%
        dplyr::bind_cols(info)
      ret
    })) %>%
    unnest_with_drop(model)
}

#' glance wrapper for kmeans
#' @export
kmeans_info <- function(df){
  validate_empty_data(df)

  ret <- glance_rowwise(df, model)
  colnames(ret)[colnames(ret) == "totss"] <- "Total Sum of Squares"
  colnames(ret)[colnames(ret) == "tot.withinss"] <- "Total Sum of Squares within Clusters"
  colnames(ret)[colnames(ret) == "betweenss"] <- "Total Sum of Squares between Clusters"
  colnames(ret)[colnames(ret) == "iter"] <- "Number of Iterations"
  ret
}

#' augment using source data (source.data column) and test index (.test_index column).
#' The data frames in source.data are in the safe column names (e.g. c1, c2, ...).
#' The augmenter for each type of models is expected to map the column names back to the original.
#' @param df Data frame that has model and .test_index.
#' @param data "training" or "test" or "newdata". Which source data should be used.
#' @param ... Additional argument to be passed to broom::augment
#' @export
prediction <- function(df, data = "training", data_frame = NULL, conf_int = 0.95, pretty.name = FALSE, ...){
  validate_empty_data(df)

  df_cnames <- colnames(df)

  # columns other than "source.data", ".test_index" and "model" should be regarded as grouping columns
  # this should be kept after running prediction
  grouping_cols <- df_cnames[!df_cnames %in% c("source.data", ".test_index", "model", ".model_metadata")]


  if (!data %in% c("test", "training", "newdata", "training_and_test")) {
    # Not mentioning training_and_test since it is used only by Analytics View.
    stop('data argument must be "test", "training" or "newdata"')
  }

  if (!all(c("source.data", ".test_index", "model") %in% colnames(df))) {
    stop('input is not model data frame')
  }

  ret <- if(data == "newdata") {
    if(is.null(data_frame)) {
      stop("Please indicate data_frame")
    }
    data_frame <- data_frame %>% dplyr::ungroup() # Ignore grouping on the new data to run prediction on.
    add_prediction(data_frame, df, conf_int = conf_int, ...)
  } else {
    # parsing arguments of prediction and getting optional arguemnt for augment in ...
    cll <- match.call()
    aug_args <- expand_args(cll, exclude = c("df", "data", "data_frame", "conf_int"))

    # if type.predict argument is not indicated in this function
    # and models have $family$linkinv (basically, glm models have it),
    # both fitted link value column and response value column should appear in the result

    with_response <- !("type.predict" %in% names(cll)) &&
                       any(lapply(df$model, function(s) { "family" %in% names(s) })) &&
                       any(lapply(df$model, function(s) { !is.null(s$family$linkinv) }))

    ret <- if(data == "test"){
      if (!is.null(df$model[[1]]$prediction_test)) { # Check if model already has test prediction result. # TODO: Make this model agnostic.
                                                     # This is typically the case for Analytics Views.
        # Augment test part of data with prediction embedded in the model.
        # This is typically for Summary tab of Analytics View on Test Mode.

        # Use formula to support expanded aug_args (especially for type.predict for logistic regression)
        # because ... can't be passed to a function inside mutate directly.
        # If test is FALSE, this uses data as an argument and if not, uses newdata as an argument.
        aug_fml_test <- if(aug_args == ""){
          "broom::augment(m, data = df, data_type='test')"
        } else {
          paste0("broom::augment(m, data = df, data_type='test', ", aug_args, ")")
        }
        augmented <- df %>%
          dplyr::ungroup() %>%
          # Filter out error classes we are using for conveying error info to Summary table.
          # Note that dplyr::filter("error" %nin% class(model)) does not work since class(model) evaluates as "list".
          dplyr::filter(purrr::flatten_lgl(purrr::map(model, function(x){"error" %nin% class(x)}))) %>%  # Since this errors out under rowwise, should be done after ungroup().
          dplyr::mutate(source.data = purrr::map2(source.data, .test_index, function(df, index){
            # Keep data in test_index for test data
            safe_slice(df, index, remove = FALSE)
          })) %>%
          dplyr::select(-.test_index) %>%
          # evaluate the formula of augment and "data" column will have it
          dplyr::mutate(source.data=purrr::map2(model, source.data, function(m,df){eval(parse(text=aug_fml_test))})) 
        augmented <- augmented %>%
          dplyr::ungroup()

        if (with_response){
          augmented <- augmented %>%
            dplyr::mutate(source.data = purrr::map2(source.data, model, add_response))
        }

        augmented %>%
          dplyr::select(-model) %>%
          unnest_with_drop(source.data)
      }
      else { # test prediction result is not in the model. Need to predict from test data.
        # augment by test data

        # check if there is test data
        test_sizes <- vapply(df[[".test_index"]], function(test){
          length(test)
        }, FUN.VALUE = 0)

        if(all(test_sizes==0)){
          stop("no test data found")
        }

        # Use formula to support expanded aug_args (especially for type.predict for logistic regression)
        # because ... can't be passed to a function inside mutate directly.
        # If test is TRUE, this uses newdata as an argument and if not, uses data as an argument.
        aug_fml <- if(aug_args == ""){
          "broom::augment(m, newdata = df)"
        } else {
          paste0("broom::augment(m, newdata = df, ", aug_args, ")")
        }
        data_to_augment <- df %>%
          dplyr::ungroup() %>%
          dplyr::filter(purrr::flatten_lgl(purrr::map(model, function(x){"error" %nin% class(x)}))) %>%  # Since this errors out under rowwise, should be done after ungroup().
          dplyr::mutate(source.data = purrr::map2(source.data, .test_index, function(df, index){
            # keep data only in test_index
            safe_slice(df, index)
          })) %>%
          dplyr::select(-.test_index)

        if(".model_metadata" %in% colnames(data_to_augment)){
          data_to_augment <- data_to_augment %>% dplyr::select(-`.model_metadata`)
        }

        augmented <- tryCatch({
          data_to_augment %>%
            # evaluate the formula of augment and "data" column will have it
            dplyr::mutate(source.data=purrr::map2(model, source.data, function(m,df){eval(parse(text=aug_fml))})) 
        }, error = function(e){
          if (grepl("arguments imply differing number of rows: ", e$message)) {
            data_to_augment %>%
              dplyr::mutate(source.data = purrr::map2(source.data, model, function(df, model){
                # remove rows that have categories that aren't in training data
                # otherwise, broom::augment causes an error
                filtered_data <- df
                for (cname in colnames(model$model)) {
                  filtered_data <- filtered_data[filtered_data[[cname]] %in% model$model[[cname]], ]
                }

                if(nrow(filtered_data) == 0){
                  stop("no data found that can be predicted by the model")
                }

                filtered_data
              })) %>%
              # evaluate the formula of augment and "data" column will have it
              dplyr::mutate(source.data=purrr::map2(model, source.data, function(m,df){eval(parse(text=aug_fml))}))
          } else {
            stop(e$message)
          }
        })

        augmented <- augmented %>% dplyr::ungroup()

        if (with_response){
          augmented <- augmented %>%
            dplyr::mutate(source.data = purrr::map2(source.data, model, add_response))
        }

        ret <- augmented %>%
          dplyr::select(-model) %>%
          unnest_with_drop(source.data)
      }

    } else if (data == "training") {
      # augment by training data

      # Use formula to support expanded aug_args (especially for type.predict for logistic regression)
      # because ... can't be passed to a function inside mutate directly.
      if ("coxph" %in% class(df$model[[1]]) || # For cox model, prediction results are always included.
          !is.null(df$model[[1]]$prediction_training) || # Check if model already has training prediction result. # TODO: Make this model agnostic.
          class(df$model[[1]])[[1]] %in% c("lm_exploratory", "glm_exploratory")) { # For lm/glm, we retrieve training prediction inside vanilla lm/glm model.
        aug_fml <- if(aug_args == ""){
          "broom::augment(m, data = df)"
        } else {
          paste0("broom::augment(m, data = df, ", aug_args, ")")
        }
      }
      else { # If not, predict as a new data.
        aug_fml <- if(aug_args == ""){
          "broom::augment(m, newdata = df)"
        } else {
          paste0("broom::augment(m, newdata = df, ", aug_args, ")")
        }
      }
      # From broom 0.7.0 augment.lm and augment.glm, if data argument is different from the one inside the model due to, for example, NA rows, error like following is raised.
      #
      # Assigned data `predict(x, newdata, type = type.predict) %>% unname()` must be compatible with existing data.
      #
      # To avoid it, do not specify data argument. Let's do this only for glm since, for example, ranger needs it to get actual prediction on training data instead of prediction on OOB data.
      # Another option for workaround was to use newdata argument. But result from it does not have metrics like residuals, standardised_residuals, hat, residual_standard_deviation, or cooks_distance.
      # Drawback of no argument is, the result misses columns in the original data that are not used for the model.
      if (class(df$model[[1]])[[1]] %in% c("glm", "glm_exploratory_0", "lm", "lm_exploratory_0")) {
        aug_fml <- if(aug_args == ""){
          "broom::augment(m)"
        } else {
          paste0("broom::augment(m, ", aug_args, ")")
        }
      }
      augmented <- df %>%
        dplyr::ungroup() %>%
        dplyr::filter(purrr::flatten_lgl(purrr::map(model, function(x){"error" %nin% class(x)}))) %>%  # Since this errors out under rowwise, should be done after ungroup().
        dplyr::mutate(source.data = purrr::map2(source.data, .test_index, function(df, index){
          # remove data in test_index
          safe_slice(df, index, remove = TRUE)
        })) %>%
        dplyr::select(-.test_index) %>%
        # evaluate the formula of augment and "data" column will have it
        dplyr::mutate(source.data=purrr::map2(model, source.data, function(m, df){eval(parse(text=aug_fml))}))

      if (with_response){
        augmented <- augmented %>%
          dplyr::mutate(source.data = purrr::map2(source.data, model, add_response))
      }

      augmented %>%
        dplyr::select(-model) %>%
        unnest_with_drop(source.data)

    } else if (data == "training_and_test") {
      # Augment data that includes both training part and test part of data with predictions embedded in the model.
      # This is for Analytics View on Test Mode.

      # Use formula to support expanded aug_args (especially for type.predict for logistic regression)
      # because ... can't be passed to a function inside mutate directly.
      # If test is FALSE, this uses data as an argument and if not, uses newdata as an argument.
      aug_fml_training <- if(aug_args == ""){
        "broom::augment(m, data = df)"
      } else {
        paste0("broom::augment(m, data = df, ", aug_args, ")")
      }
      aug_fml_test <- if(aug_args == ""){
        "broom::augment(m, data = df, data_type = 'test')"
      } else {
        paste0("broom::augment(m, data = df, data_type = 'test', ", aug_args, ")")
      }
      augmented <- df %>%
        dplyr::ungroup() %>%
        dplyr::filter(purrr::flatten_lgl(purrr::map(model, function(x){"error" %nin% class(x)}))) %>%  # Since this errors out under rowwise, should be done after ungroup().
        dplyr::mutate(source.data.training = purrr::map2(source.data, .test_index, function(df, index){
          # Remove data in test_index for training data
          safe_slice(df, index, remove = TRUE)
        }), source.data.test = purrr::map2(source.data, .test_index, function(df, index){
          # Keep data in test_index for test data
          safe_slice(df, index, remove = FALSE)
        })) %>%
        dplyr::select(-.test_index) %>%
        # evaluate the formula of augment and "data" column will have it
        dplyr::mutate(source.data.training = purrr::map2(model, source.data.training, function(m, df){eval(parse(text=aug_fml_training))}),
                      source.data.test = purrr::map2(model, source.data.test, function(m, df){eval(parse(text=aug_fml_test))}))
      augmented <- augmented %>%
        dplyr::ungroup() %>% # ungroup is necessary here to get expected df1, df2 value in the next line.
        dplyr::mutate(source.data = purrr::map2(source.data.training, source.data.test, function(df1, df2){
          df1 <- df1 %>% dplyr::mutate(is_test_data=FALSE)
          df2 <- df2 %>% dplyr::mutate(is_test_data=TRUE)
          bind_rows_safe(df1, df2)
        })) %>%
        dplyr::select(-source.data.training, -source.data.test) %>%
        dplyr::ungroup()

      if (with_response){
        augmented <- augmented %>%
          dplyr::mutate(source.data = purrr::map2(source.data, model, add_response))
      }

      augmented <- augmented %>%
        dplyr::select(-model) %>%
        unnest_with_drop(source.data)

      # Since is_test_data can go to strange position if there are columns that exists only in certain groups,
      # move it to the last explicitly.
      augmented <- augmented %>% select(-is_test_data, everything(), is_test_data)
      augmented

    }
  }

  # add confidence interval if conf_int is not null and there are .fitted and .se.fit
  if (!is.null(conf_int) & ".se.fit" %in% colnames(ret) & ".fitted" %in% colnames(ret)) {
    if (conf_int < 0 | conf_int > 1){
      stop("conf_int must be between 0 and 1")
    }
    conf_low_colname <- avoid_conflict(colnames(ret), "conf_low")
    conf_high_colname <- avoid_conflict(colnames(ret), "conf_high")
    lower <- (1-conf_int)/2
    higher <- 1-lower
    ret[[conf_low_colname]] <- get_confint(ret[[".fitted"]], ret[[".se.fit"]], conf_int = lower)
    ret[[conf_high_colname]] <- get_confint(ret[[".fitted"]], ret[[".se.fit"]], conf_int = higher)

    # Bring conf_low and conf_high right after predicted_value, instead of .se.fit column.
    ret <- ret %>% dplyr::relocate(any_of(c(!!conf_low_colname, !!conf_high_colname, ".se.fit")), .after=.fitted)
  }

  colnames(ret)[colnames(ret) == ".fitted"] <- avoid_conflict(colnames(ret), "predicted_value")
  colnames(ret)[colnames(ret) == ".se.fit"] <- avoid_conflict(colnames(ret), "standard_error")
  colnames(ret)[colnames(ret) == ".resid"] <- avoid_conflict(colnames(ret), "residuals")
  colnames(ret)[colnames(ret) == ".hat"] <- avoid_conflict(colnames(ret), "hat")
  colnames(ret)[colnames(ret) == ".sigma"] <- avoid_conflict(colnames(ret), "residual_standard_deviation")
  colnames(ret)[colnames(ret) == ".cooksd"] <- avoid_conflict(colnames(ret), "cooks_distance")
  colnames(ret)[colnames(ret) == ".std.resid"] <- avoid_conflict(colnames(ret), "standardised_residuals")

  if (pretty.name) {
    if (!is.null(ret$predicted_response) && !is.null(ret$predicted_value)) {
      # If there are both predicted_response and predicted_value, predicted_value there most likely is linear predictor from glm.
      colnames(ret)[colnames(ret) == "predicted_value"] <- avoid_conflict(colnames(ret), "Linear Predictor")
      colnames(ret)[colnames(ret) == "predicted_response"] <- avoid_conflict(colnames(ret), "Predicted Value")
    }
    else { # If not, whichever that exists should become Predicted Value
      colnames(ret)[colnames(ret) == "predicted_value"] <- avoid_conflict(colnames(ret), "Predicted Value")
      colnames(ret)[colnames(ret) == "predicted_response"] <- avoid_conflict(colnames(ret), "Predicted Value")
    }
    colnames(ret)[colnames(ret) == "predicted_label"] <- avoid_conflict(colnames(ret), "Predicted Label")
    colnames(ret)[colnames(ret) == "predicted_probability"] <- avoid_conflict(colnames(ret), "Predicted Probability") # For ranger etc. we handle classifications with this function, as opposed to in prediction_binary().
    colnames(ret)[colnames(ret) == "standard_error"] <- avoid_conflict(colnames(ret), "Standard Error")
    colnames(ret)[colnames(ret) == "residuals"] <- avoid_conflict(colnames(ret), "Residuals")
    colnames(ret)[colnames(ret) == "hat"] <- avoid_conflict(colnames(ret), "Hat")
    colnames(ret)[colnames(ret) == "residual_standard_deviation"] <- avoid_conflict(colnames(ret), "Estimated Residual SD")
    colnames(ret)[colnames(ret) == "cooks_distance"] <- avoid_conflict(colnames(ret), "Cook's Distance")
    colnames(ret)[colnames(ret) == "standardised_residuals"] <- avoid_conflict(colnames(ret), "Standardised Residuals")
    colnames(ret)[colnames(ret) == "conf_low"] <- avoid_conflict(colnames(ret), "Conf Low")
    colnames(ret)[colnames(ret) == "conf_high"] <- avoid_conflict(colnames(ret), "Conf High")
    colnames(ret)[colnames(ret) == "is_test_data"] <- avoid_conflict(colnames(ret), "Test Data")
    ret <- ret %>% dplyr::rename_with(function(x){gsub("predicted_probability_","Predicted Probability for ", x)}, starts_with("predicted_probability_")) # For multiclass classification.

    # Show Residuals, Standardized Residuals, Estimated Residual SD next to each other.
    if (all(c("Residuals", "Estimated Residual SD") %in% colnames(ret))) {
      ret <- ret %>% dplyr::relocate(`Estimated Residual SD`, .after=`Residuals`)
    }
    if (all(c("Residuals", "Standardised Residuals") %in% colnames(ret))) {
      ret <- ret %>% dplyr::relocate(`Standardised Residuals`, .after=`Residuals`)
    }
  }

  dplyr::group_by(ret, !!!rlang::syms(grouping_cols))
}

# Simplified prediction() that works only with new Analytics View model df. This should be kept model-agnostic.
# This one does not use .test_index and source.data columns.
prediction2 <- function(df, data_frame = NULL, conf_int = 0.95, pretty.name=FALSE, ...){
  validate_empty_data(df)

  pred_survival_time <- attr(df, "pred_survival_time")

  df_cnames <- colnames(df)

  # columns other than "model" should be regarded as grouping columns
  # this should be kept after running prediction
  grouping_cols <- df_cnames[!df_cnames %in% c("model")]

  # parsing arguments of prediction and getting optional arguemnt for augment in ...
  cll <- match.call()
  aug_args <- expand_args(cll, exclude = c("df", "data", "data_frame", "conf_int"))

  # Augment data that includes both training part and test part of data with predictions embedded in the model.
  # This is for Analytics View on Test Mode.

  # Use formula to support expanded aug_args (especially for type.predict for logistic regression)
  # because ... can't be passed to a function inside mutate directly.
  # If test is FALSE, this uses data as an argument and if not, uses newdata as an argument.
  aug_fml_training <- if(aug_args == ""){
    "broom::augment(m)"
  } else {
    paste0("broom::augment(m, ", aug_args, ")")
  }
  aug_fml_test <- if(aug_args == ""){
    "broom::augment(m, data_type = 'test')"
  } else {
    paste0("broom::augment(m, data_type = 'test', ", aug_args, ")")
  }
  augmented <- df %>%
    dplyr::ungroup() %>%
    dplyr::filter(purrr::flatten_lgl(purrr::map(model, function(x){"error" %nin% class(x)}))) %>%  # Since this errors out under rowwise, should be done after ungroup().
    # evaluate the formula of augment and "data" column will have it
    dplyr::mutate(source.data.training = purrr::map(model, function(m){eval(parse(text=aug_fml_training))}),
                  source.data.test = purrr::map(model, function(m){eval(parse(text=aug_fml_test))}))
  augmented <- augmented %>%
    dplyr::ungroup() %>% # ungroup is necessary here to get expected df1, df2 value in the next line.
    dplyr::mutate(source.data = purrr::map2(source.data.training, source.data.test, function(df1, df2){
      df1 <- df1 %>% dplyr::mutate(is_test_data=FALSE)
      df2 <- df2 %>% dplyr::mutate(is_test_data=TRUE)
      res <- bind_rows_safe(df1, df2)
      res[grouping_cols]<-NULL # A dirty hack to avoid column name conflict at unnest. TODO: Maybe group column should not be there inside do_on_each_group in the first place.
      res
    })) %>%
    dplyr::select(-source.data.training, -source.data.test) %>%
    dplyr::ungroup()

  augmented <- augmented %>%
    dplyr::select(-model) %>%
    unnest_with_drop(source.data)

  # Since is_test_data can go to strange position if there are columns that exists only in certain groups,
  # move it to the last explicitly.
  ret <- augmented %>% select(-is_test_data, everything(), is_test_data)

  # add confidence interval if conf_int is not null and there are .fitted and .se.fit
  if (!is.null(conf_int) & ".se.fit" %in% colnames(ret) & ".fitted" %in% colnames(ret)) {
    if (conf_int < 0 | conf_int > 1){
      stop("conf_int must be between 0 and 1")
    }
    conf_low_colname <- avoid_conflict(colnames(ret), "conf_low")
    conf_high_colname <- avoid_conflict(colnames(ret), "conf_high")
    lower <- (1-conf_int)/2
    higher <- 1-lower
    ret[[conf_low_colname]] <- get_confint(ret[[".fitted"]], ret[[".se.fit"]], conf_int = lower)
    ret[[conf_high_colname]] <- get_confint(ret[[".fitted"]], ret[[".se.fit"]], conf_int = higher)

    # move confidece interval columns next to standard error
    ret <- move_col(ret, '.se.fit', which(colnames(ret) == ".fitted") + 1)
    ret <- move_col(ret, conf_low_colname, which(colnames(ret) == ".se.fit") + 1)
    ret <- move_col(ret, conf_high_colname, which(colnames(ret) == conf_low_colname) + 1)
  }

  colnames(ret)[colnames(ret) == ".fitted"] <- avoid_conflict(colnames(ret), "predicted_value")
  colnames(ret)[colnames(ret) == ".se.fit"] <- avoid_conflict(colnames(ret), "standard_error")
  colnames(ret)[colnames(ret) == ".resid"] <- avoid_conflict(colnames(ret), "residuals")
  colnames(ret)[colnames(ret) == ".hat"] <- avoid_conflict(colnames(ret), "hat")
  colnames(ret)[colnames(ret) == ".sigma"] <- avoid_conflict(colnames(ret), "residual_standard_deviation")
  colnames(ret)[colnames(ret) == ".cooksd"] <- avoid_conflict(colnames(ret), "cooks_distance")
  colnames(ret)[colnames(ret) == ".std.resid"] <- avoid_conflict(colnames(ret), "standardised_residuals")

  if (pretty.name) {
    colnames(ret)[colnames(ret) == "is_test_data"] <- avoid_conflict(colnames(ret), "Test Data")
  }

  if (length(grouping_cols) > 0) {
    ret <- dplyr::group_by(ret, !!!rlang::syms(grouping_cols))
  }

  # Pass down survival time used for prediction. This is for the post-processing for time-dependent ROC.
  if (!is.null(pred_survival_time)) {
    attr(ret, "pred_survival_time") <- pred_survival_time
  }
  ret
}

add_prediction2 <- function(df, model_df, conf_int = 0.95, ...){
  # Only data frames with single model (no group_by) are supported for now.
  ret <- broom::augment(model_df$model[[1]], newdata=df, ...)
  ret
}

# Simplified, model agnostic version of rf_evaluation_training_and_test.
# Currently used only by cox regression and survival forest, but planning to migrate others to this one.
evaluation <- function(df, ...){
  validate_empty_data(df)

  df_cnames <- colnames(df)

  # columns other than "source.data", ".test_index" and "model" should be regarded as grouping columns
  # this should be kept after running prediction
  grouping_cols <- df_cnames[!df_cnames %in% c("model")]

  # parsing arguments of prediction and getting optional arguemnt for augment in ...
  cll <- match.call()
  glance_args <- expand_args(cll, exclude = c("df"))

  # Augment data that includes both training part and test part of data with predictions embedded in the model.
  # This is for Analytics View on Test Mode.

  # Use formula to support expanded glance_args (especially for type.predict for logistic regression)
  # because ... can't be passed to a function inside mutate directly.
  # If test is FALSE, this uses data as an argument and if not, uses newdata as an argument.
  aug_fml_training <- if(glance_args == ""){
    "broom::glance(m)"
  } else {
    paste0("broom::glance(m, ", glance_args, ")")
  }
  aug_fml_test <- if(glance_args == ""){
    "broom::glance(m, data_type = 'test')"
  } else {
    paste0("broom::glance(m, data_type = 'test', ", glance_args, ")")
  }
  df <- df %>%
    dplyr::ungroup() %>%
    dplyr::filter(purrr::flatten_lgl(purrr::map(model, function(x){"error" %nin% class(x)}))) %>%  # Since this errors out under rowwise, should be done after ungroup().
    # evaluate the formula of augment and "data" column will have it
    dplyr::mutate(source.data.training = purrr::map(model, function(m){eval(parse(text=aug_fml_training))}),
                  source.data.test = purrr::map(model, function(m){eval(parse(text=aug_fml_test))}))
  df <- df %>%
    dplyr::ungroup() %>% # ungroup is necessary here to get expected df1, df2 value in the next line.
    dplyr::mutate(source.data = purrr::map2(source.data.training, source.data.test, function(df1, df2){
      if (nrow(df2) > 0) {
        df1 <- df1 %>% dplyr::mutate(is_test_data=FALSE)
        df2 <- df2 %>% dplyr::mutate(is_test_data=TRUE)
        bind_rows_safe(df1, df2)
      }
      else {
        df1
      }
    })) %>%
    dplyr::select(-source.data.training, -source.data.test) %>%
    dplyr::ungroup()

  df <- df %>%
    dplyr::select(-model) %>%
    unnest_with_drop(source.data)

  if (length(grouping_cols) > 0) {
    df <- dplyr::group_by(df, !!!rlang::syms(grouping_cols))
  }

  # Prettify is_test_data column. Do this after the above arrange calls, since it looks at is_test_data column.
  if (!is.null(df$is_test_data)) {
 
    if (length(grouping_cols) > 0) {
      df <- df %>% dplyr::select(!!!rlang::syms(grouping_cols), is_test_data, everything()) 
    } else {
      df <- df %>% dplyr::select(is_test_data, everything()) 
    }

    df <- df %>% 
      dplyr::mutate(is_test_data = dplyr::if_else(is_test_data, "Test", "Training")) %>%
      dplyr::rename(`Data Type` = is_test_data)
  }
  df
}

#' prediction wrapper for both training and test data
#' There are not much reason to call this function any more, since you can call prediction(data='training_and_test')
#' directly. Only remaining case we need to call this is for confusion matrix.
#' @param df Data frame to predict. This should have model column.
#' @export
prediction_training_and_test <- function(df, prediction_type="default", threshold = 0.5, ...) {
  # ungroup() is to avoid error from filter that happens under rowwise.
  # filtered <- df %>% ungroup() %>% dplyr::filter(!is.null(model) & "error" %nin% class(model))
  filtered <- df %>% ungroup() %>% dplyr::filter(purrr::flatten_lgl(purrr::map(model, function(x){!is.null(x) & "error" %nin% class(x)})))
  if (nrow(filtered) == 0) { # No valid models were returned.
    return(data.frame())
  }
  model <- filtered$model[[1]]

  grouped_cols <- colnames(df)[!colnames(df) %in% c("model", ".test_index", "source.data", ".model_metadata")]

  # Note that for ranger/rpart, even for binary prediction case, we are using "default" prediction(),
  # and predicted_label column is set by augment.ranger.classification, which is called internally.
  ret <- switch(prediction_type,
                    default = prediction(df, data='training_and_test', ...),
                    binary = prediction_binary(df,  data='training_and_test', threshold = threshold, ...),
                    conf_mat = prediction_binary(df, data='training_and_test', threshold = threshold, ...), # Same as 'binary'. Aggregation for
                                                                                                            # confusion matrix is done later in this function.
                    coxph = prediction_coxph(df, data='training_and_test', ...))


  if (prediction_type == "conf_mat") {
    target_col <- all.vars(model$formula)[[1]]
    # Get original target column name. Use the model filtered above so that it is always a valid model rather than an error object.
    target_col_orig <- model$terms_mapping[[target_col]]

    each_mat_func <- function(df) {
      actual_val <- df[[target_col_orig]]
      predicted <- df$predicted_label

      df <- calc_conf_mat(actual_val, predicted)
      df
    }

    target_df <- ret %>% group_by(is_test_data, add = TRUE)
    do_on_each_group(target_df, each_mat_func, with_unnest = TRUE)
  } else {
    ret %>% dplyr::arrange(desc(is_test_data), .by_group = TRUE)
  }
}

#' Wrapper around prediction() to set predicted_probability and predicted_label with optimized threshold.
#' Currently, in terms of Analytics View, this is really for logistic regression and GLM, since for ranger and rpart, prediction() already returns predicted_probability and predicted_label.
#' For Model Steps, even for ranger, this function is used.
#' @param df Data frame to predict. This should have model column.
#' @param threshold Threshold value for predicted probability or what to optimize. It can be "f_score", "accuracy", "precision", "sensitivity" or "specificity" to optimize.
#' @export
prediction_binary <- function(df, threshold = 0.5, pretty.name = FALSE, ...){
  # ungroup() is necessary to avoid error under rowwise(). Putting rowwise at the end to put it back to rowwise again. TODO: Is it possible that the input is not under rowwise and our adding rowwise affect processing that follows?
  df <- df %>% ungroup() %>% dplyr::filter(purrr::flatten_lgl(purrr::map(model, function(x){!is.null(x) & "error" %nin% class(x)}))) %>% dplyr::rowwise()

  if (nrow(df) == 0) { # No valid models were returned.
    return(data.frame())
  }
  first_model <- df$model[[1]]

  ret <- prediction(df, ...)

  # converting conf_low and conf_high from regression values
  # to probability values
  if(any(class(first_model) %in% "glm")){
    if (!is.null(first_model$family)) {
      # linkinv is a function to convert regression values
      # to response values (inverse of logit for logistic regression)
      if (!is.null(first_model$family$linkinv)) {
        if (any(colnames(ret) %in% "conf_low")) {
          conf_low_vec <- first_model$family$linkinv(ret[["conf_low"]])
          ret[["conf_low"]] <- NULL # Remove column once to move it to the last.
          ret[["conf_low"]] <- conf_low_vec
        }
        if (any(colnames(ret) %in% "conf_high")) {
          conf_high_vec <- first_model$family$linkinv(ret[["conf_high"]])
          ret[["conf_high"]] <- NULL # Remove column once to move it to the last.
          ret[["conf_high"]] <- conf_high_vec
        }
      }
    }
  }

  # get actual value column name from model formula
  actual_col <- if(!is.null(first_model$formula)) {
    all.vars(first_model$formula)[[1]]
  } else if (!is.null(first_model$terms)) {
    all.vars(first_model$terms)[[1]]
  }else {
    # this is for xgboost model
    if ("xgb.Booster" %in% class(first_model)) {
      all.vars(first_model$fml)[[1]]
    } else {
      stop(paste0(class(first_model)[[1]], " is not supported by prediction_binary"))
    }
  }

  # if there is terms_mapping for randomForest, ranger, or glm_exploratory, use the original column name
  if (!is.null(first_model$terms_mapping) && !is.null(first_model$terms_mapping[[actual_col]])) {
    actual_col <- first_model$terms_mapping[[actual_col]]
  }

  actual_val <- ret[[actual_col]]
  actual_logical <- if ((is.character(actual_val) || is.factor(actual_val)) && "ranger" %in% class(first_model)) {
    # For binary prediction of ranger/rpart used from Analytics View, we use prediction() rather than this prediction_binary().
    # Only case where it comes here is via Analytics Step. Maybe even for that, we should start calling prediction() to
    # use common code as much as possible, but for now Analytics Step of ranger keeps using prediction_binary() for the
    # ability to overwrite classification threshold, and optimized classification threshold. TODO: Clean up this situation.
    # And, when it is ranger, (there is no rpart Analytics Step.)
    # we consider the first level to be "TRUE". This is different from logistic regression.
    actual_val == levels(first_model$y)[[1]]
  }
  else {
    as.logical(as.numeric(actual_val)) # From the code before adding this if clause, but can't be sure if this covers rest of the cases well. TODO: look into it.
  }

  prob_col_name <- if ("predicted_response" %in% colnames(ret)) {
    "predicted_response"
  } else if ("predicted_probability" %in% colnames(ret)){
    "predicted_probability"
  } else {
    "predicted_value"
  }

  thres <- if (!is.numeric(threshold)) {
    # need actual column to optimize threshold,
    # so the column name should be validated
    if(!actual_col %in% colnames(ret)) {
      stop("There is no actual result in data and can't optimize threshold.")
    }
    opt <- get_optimized_score(actual_logical, ret[[prob_col_name]], threshold)
    opt[["threshold"]]
  } else {
    threshold
  }

  predicted <- ret[[prob_col_name]] >= thres

  # Here, we are trying to cast the predicted to the same data type as the actual value column of test data (or new data if it for some reason has the target column).
  label <- if (is.null(actual_val)) {
    # newdata case, which is usually without actual value column. Just return the logical as is for now.
    if(!is.null(df[["model"]][[1]]) && !is.null(df[["model"]][[1]]$y_levels)){
      # this is new data prediction for xgboost_binary
      # to check levels of response column because
      # it might be factor with two levels, 2 numbers or logical
      # so the predicted result must be the same type too
      df[["model"]][[1]]$y_levels[as.numeric(predicted) + 1]
    } else {
      predicted # Just return the logical as is for now.
    }
  } else if (is.logical(actual_val)) {
    predicted
  } else if (is.numeric(actual_val)) {
    as.numeric(predicted)
  } else if (is.factor(actual_val)){
    if ("ranger" %in% class(first_model) || "rpart" %in% class(first_model)) {
      # For binary prediction of ranger/rpart used from Analytics View, we use prediction() rather than this prediction_binary().
      # Only case where it comes here is via Analytics Step. Maybe even for that, we should start calling prediction() to
      # use common code as much as possible, but for now Analytics Step of ranger keeps using prediction_binary() for the
      # ability to overwrite classification threshold, and optimized classification threshold. TODO: Clean up this situation.
      # And, when it is ranger, (there is no rpart Analytics Step, but added if statement just for completeness.)
      # we consider the first level to be "TRUE". This is different from logistic regression.
      factor(levels(actual_val)[2 - as.numeric(predicted)], levels(actual_val))
    }
    else {
      # create a factor vector with the same levels as actual_val
      # predicted is logical, so should +1 to make it index
      factor(levels(actual_val)[as.numeric(predicted) + 1], levels(actual_val))
    }
  } else if (is.character(actual_val)) {
    if ("ranger" %in% class(first_model)) {
      # In case of ranger, TRUE corresponds to 1st factor level.
      if_else(predicted, levels(first_model$y)[[1]], levels(first_model$y)[[2]])
      lev <- levels(first_model$y)
      # TRUE turns into 1 by as.numeric, which makes the index of lev 1. (2-1). FALSE makes the index 2.
      factor(lev[2 - as.numeric(predicted)], lev)
    }
    else if(!is.null(first_model$model) && !is.null(first_model$model[[actual_col]])){
      # modify actual_val to factor with levels used in training data
      lev <- first_model$model[[actual_col]] %>% levels()
      factor(lev[as.numeric(predicted) + 1], lev)
    } else {
      NULL
    }
  } else {
    predicted # There should not be any case that reaches here, but in such case, just return the logical as is for now.
  }

  ret[["predicted_label"]] <- label

  colnames(ret)[colnames(ret) == prob_col_name] <- "predicted_probability"

  if (!is.null(ret$predicted_value)) {
    # Bring those columns as the first of the prediction result related additional columns.
    ret <- ret %>% dplyr::relocate(any_of(c("predicted_label", "predicted_probability", "conf_low", "conf_high")), .before=predicted_value)
  }

  # Move is_test_data to the last again, since new columns were added to the last in this function.
  if (!is.null(ret$is_test_data)) {
    ret <- ret %>% dplyr::select(-is_test_data, everything(), is_test_data)
  }

  if (pretty.name) {
    if (!is.null(ret$predicted_probability) && !is.null(ret$predicted_value)) {
      # If there is predicted_probability, predicted_value there most likely is linear predictor from glm.
      colnames(ret)[colnames(ret) == "predicted_value"] <- avoid_conflict(colnames(ret), "Linear Predictor")
    }
    else { # Not too sure if there is actual case for this case.
      colnames(ret)[colnames(ret) == "predicted_value"] <- avoid_conflict(colnames(ret), "Predicted Value")
    }
    colnames(ret)[colnames(ret) == "predicted_label"] <- avoid_conflict(colnames(ret), "Predicted Label")
    colnames(ret)[colnames(ret) == "predicted_response"] <- avoid_conflict(colnames(ret), "Predicted Response")
    colnames(ret)[colnames(ret) == "predicted_probability"] <- avoid_conflict(colnames(ret), "Predicted Probability")
    colnames(ret)[colnames(ret) == "standard_error"] <- avoid_conflict(colnames(ret), "Standard Error")
    colnames(ret)[colnames(ret) == "residuals"] <- avoid_conflict(colnames(ret), "Residuals")
    colnames(ret)[colnames(ret) == "hat"] <- avoid_conflict(colnames(ret), "Hat")
    colnames(ret)[colnames(ret) == "residual_standard_deviation"] <- avoid_conflict(colnames(ret), "Estimated Residual SD")
    colnames(ret)[colnames(ret) == "cooks_distance"] <- avoid_conflict(colnames(ret), "Cook's Distance")
    colnames(ret)[colnames(ret) == "standardised_residuals"] <- avoid_conflict(colnames(ret), "Standardised Residuals")
    colnames(ret)[colnames(ret) == "conf_low"] <- avoid_conflict(colnames(ret), "Conf Low")
    colnames(ret)[colnames(ret) == "conf_high"] <- avoid_conflict(colnames(ret), "Conf High")
    colnames(ret)[colnames(ret) == "is_test_data"] <- avoid_conflict(colnames(ret), "Test Data")
    # Show Residuals, Standardized Residuals, Estimated Residual SD next to each other.
    if (all(c("Residuals", "Estimated Residual SD") %in% colnames(ret))) {
      ret <- ret %>% dplyr::relocate(`Estimated Residual SD`, .after=`Residuals`)
    }
    if (all(c("Residuals", "Standardised Residuals") %in% colnames(ret))) {
      ret <- ret %>% dplyr::relocate(`Standardised Residuals`, .after=`Residuals`)
    }
  }
  ret
}

#' prediction wrapper to add predicted_probability (probability of "death"), predicted_status,
#' and actual_status, when time is specified.
#' @param df Data frame to predict. This should have model column.
#' @param time The point of time at which we predict survival of subjects.
#' @param threshold The threshold probability to determine predicted_status.
#' @export
prediction_coxph <- function(df, time = NULL, threshold = 0.5, ...){
  validate_empty_data(df)

  # TODO: need to make sure prediction.type is set to "lp"
  # when time is specified.
  ret <- prediction(df, ...)

  # if time is not specified return prediction() result as is.
  if (is.null(time)) {
    return(ret)
  }

  # extract variables for time and status from model formula
  surv_vars <- all.vars(lazyeval::f_lhs(df$model[[1]]$formula))
  time_colname <- surv_vars[[1]]
  status_colname <- surv_vars[[2]]

  # get group columns.
  # we assume that columns of model df other than the ones with reserved name (added by build_model() in build_model.R) are all group columns.
  # TODO: centralize the list of those column names.
  model_df_colnames = colnames(df)
  group_by_names <- model_df_colnames[!model_df_colnames %in% c("source.data", ".test_index", "model", ".model_metadata")]

  # when pre-grouped, prediction() result is grouped.
  # but when not pre-grouped, it is without grouping.
  # in that case, we need to group it by something to work with following operations with nest/mutate/map.
  if (length(group_by_names) == 0) {
    ret <- ret %>% dplyr::mutate(dummy_group_col = 1) %>% dplyr::group_by(dummy_group_col)
  }

  # push ret in df so that we can work on ret and source.data at the same time in folliwing mutate() of df.
  nested_ret_df <- ret %>% tidyr::nest()
  df[["ret"]] <- nested_ret_df$data

  # group df. rowwise nature of df is stripped here.
  if (length(group_by_names) == 0) {
    # need to group by something to work with following operations with mutate/map.
    df <- df %>% dplyr::mutate(dummy_group_col = 1) %>% dplyr::group_by(dummy_group_col)
  }
  else {
    df <- df %>% dplyr::group_by(!!!rlang::syms(group_by_names))
  }

  # add predicted_probability, actual_status, and predicted_status
  ret <- df %>%
    dplyr::mutate(ret = purrr::map2(ret, model, function(ret, model) {
      # basehaz returns base cumulative hazard.
      bh <- survival::basehaz(model)
      # create a function to interpolate function that returns cumulative hazard.
      bh_fun <- approxfun(bh$time, bh$hazard)
      cumhaz_base = bh_fun(time)
      # transform linear predictor (predicted_value) into predicted_probability.
      # predicted_probability is 1 - (y of survival curve).
      ret <- ret %>% dplyr::mutate(predicted_probability = 1 - exp(-cumhaz_base * exp(predicted_value)))
      ret
    }))
  ret <- ret %>% dplyr::select(!!!c("ret", group_by_names))
  ret <- ret %>% unnest_with_drop(ret)

  # set it back to non-group-by state that is same as predict() output.
  if (length(group_by_names) == 0) {
    ret <- ret %>% dplyr::ungroup() %>% dplyr::select(-dummy_group_col)
  }

  true_value = TRUE
  if (is.numeric(ret[[status_colname]])) {
    if (any(ret[[status_colname]] == 2)) {
      true_value = 2
    }
    else {
      true_value = 1
    }
  }

  ret <- ret %>% dplyr::mutate(predicted_status = predicted_probability > threshold)
  if (length(group_by_names) > 0) {
    ret <- ret %>% ungroup() # next line does not work under group_by state, probably because of using dot. so ungroup it once.
  }
  ret <- ret %>% dplyr::mutate(actual_status = if_else((.[[time_colname]] <= time & .[[status_colname]] == true_value), TRUE, if_else(.[[time_colname]] >= time, FALSE, NA)))
  if (length(group_by_names) > 0) {
    ret <- ret %>% dplyr::group_by(!!!rlang::syms(group_by_names)) # group it back again.
  }

  if (is.numeric(true_value)) {
    ret$predicted_status <- as.numeric(ret$predicted_status)
    ret$actual_status <- as.numeric(ret$actual_status)
    if (true_value == 2) {
      ret$predicted_status <- ret$predicted_status + 1
      ret$actual_status <- ret$actual_status + 1
    }
  }
  ret
}

#' tidy wrapper for lm and glm
#' @export
model_coef <- function(df, pretty.name = FALSE, conf_int = NULL, ...){
  validate_empty_data(df)

  dots <- list(...)

  ret <- if (!is.null(conf_int)) {
    if (conf_int == "default") {

      level <- dots$conf.level

      if (is.null(level)) {
        # default confidence level
        level <- 0.95
      }

      df %>%
        dplyr::ungroup() %>%
        dplyr::mutate(model = purrr::map(model, function(model){
          # use confint.default for performance
          tidy_ret <- broom::tidy(model, ...)

          # calculate confidence interval by estimate and std.error
          if(any(colnames(tidy_ret) %in% "estimate") & any(colnames(tidy_ret) %in% "std.error")){
            level_low <- (1-level)/2
            level_high <- 1-level_low

            tidy_ret[["conf.low"]] <- get_confint(tidy_ret[["estimate"]], tidy_ret[["std.error"]], level_low)
            tidy_ret[["conf.high"]] <- get_confint(tidy_ret[["estimate"]], tidy_ret[["std.error"]], level_high)
          }
          tidy_ret

        })) %>%
        unnest_with_drop(model)
    } else {
      # broom::tidy uses confint.lm and it uses profile, so "profile" is used in conf_int to swith how to get confint
      profile <- conf_int == "profile"
      tidy_rowwise(df, model, conf.int = profile, pretty.name = pretty.name, ...)
    }
  } else {
    tidy_rowwise(df, model, pretty.name = pretty.name, ...)
  }

  if ("glm" %in% class(df$model[[1]])) {
    if (!is.null(df$model[[1]]$family)) {
      if (df$model[[1]]$family$family == "binomial"){
        ret <- ret %>% dplyr::mutate(odds_ratio = exp(estimate))
      }
    }
  }
  if ("coxph" %in% class(df$model[[1]])) {
    ret <- ret %>% dplyr::mutate(
      hazard_ratio = exp(estimate)
    )
  }
  if ("multinom" %in% class(df$model[[1]])) {
    # estimate should be odds_ratio and logarithm of estimate should be new estimate
    # conf_low and conf_high should be also logarithm of themselves
    odds_ratio <- ret[["estimate"]]
    ret[["estimate"]] <- log(ret[["estimate"]])
    ret[["odds_ratio"]] <- odds_ratio
    if("conf.low" %in% colnames(ret)){
      ret[["conf.low"]] <- log(ret[["conf.low"]])
    }
    if("conf.high" %in% colnames(ret)){
      ret[["conf.high"]] <- log(ret[["conf.high"]])
    }
  }

  if (pretty.name){
    colnames(ret)[colnames(ret) == "term"] <- "Term"
    colnames(ret)[colnames(ret) == "statistic"] <- "t Value"
    colnames(ret)[colnames(ret) == "p.value"] <- "P Value"
    colnames(ret)[colnames(ret) == "std.error"] <- "Std Error"
    colnames(ret)[colnames(ret) == "estimate"] <- "Coefficient"
    colnames(ret)[colnames(ret) == "conf.low"] <- "Conf Low"
    colnames(ret)[colnames(ret) == "conf.high"] <- "Conf High"
    colnames(ret)[colnames(ret) == "hazard_ratio"] <- "Hazard Ratio"
    colnames(ret)[colnames(ret) == "odds_ratio"] <- "Odds Ratio"
    colnames(ret)[colnames(ret) == "y.level"] <- "Predicted Label"
  } else {
    colnames(ret)[colnames(ret) == "statistic"] <- "t_ratio"
    colnames(ret)[colnames(ret) == "p.value"] <- "p_value"
    colnames(ret)[colnames(ret) == "std.error"] <- "std_error"
    colnames(ret)[colnames(ret) == "conf.low"] <- "conf_low"
    colnames(ret)[colnames(ret) == "conf.high"] <- "conf_high"
    colnames(ret)[colnames(ret) == "y.level"] <- "predicted_label"
  }
  # tidy() on coxph keeps .test_index and source.data that we added. Let's drop it.
  if(".test_index" %in% colnames(ret)){
    ret <- ret %>% dplyr::select(-.test_index)
  }
  if("source.data" %in% colnames(ret)){
    ret <- ret %>% dplyr::select(-source.data)
  }
  ret
}

#' glance wrapper
#' @export
model_stats <- function(df, pretty.name = FALSE, ...){
  validate_empty_data(df)

  ret <- glance_rowwise(df, model, pretty.name = pretty.name, ...)

  formula_vars <- if (any(c("multinom", "lm", "glm") %in% class(df$model[[1]]))) {
    # lm, glm (including logistic), multinom has a formula class attribute $terms.
    # dot (.) is espanded into actual names there, which is convenient for our purpose of finding factor variables.
    all.vars(df$model[[1]]$terms)
  } else if("coxph" %in% class(df$model[[1]])) {
    # coxph has $formula, but it can have dot (.) in it, which is not good for extracting variable names.
    # use $assign instead.
    names(df$model[[1]]$assign)
  } else {
    NULL
  }

  # get group columns.
  # we assume that columns of model df other than the ones with reserved name are all group columns.
  model_df_colnames = colnames(df)
  group_by_names <- model_df_colnames[!model_df_colnames %in% c("source.data", ".test_index", "model", ".model_metadata")]

  # when pre-grouped, each row of glance result is actually a group.
  # but when not pre-grouped, the only-one row is not a group.
  # in that case, we need to group it by something to work with following operations with nest/mutate/map.
  if (length(group_by_names) == 0) {
    ret <- ret %>% dplyr::mutate(dummy_group_col = 1) %>% dplyr::group_by(dummy_group_col)
  }

  # push ret in df so that we can work on ret and source.data at the same time in folliwing mutate() of df.
  nested_ret_df <- ret %>% tidyr::nest()
  df[["ret"]] <- nested_ret_df$data

  # group df. rowwise nature of df is stripped here.
  if (length(group_by_names) == 0) {
    # need to group by something to work with following operations with mutate/map.
    df <- df %>% dplyr::mutate(dummy_group_col = 1) %>% dplyr::group_by(dummy_group_col)
  }
  else {
    df <- df %>% dplyr::group_by(!!!rlang::syms(group_by_names))
  }

  ret <- df %>%
    dplyr::mutate(ret = purrr::map2(ret, source.data, function(ret, source_data) {
      # for each factor variable in the formula, add base level info column to ret.
      if(!is.null(formula_vars)){
        for(var in formula_vars) {
          if(is.factor(source_data[[var]])) {
            if(pretty.name) {
              ret[paste0("Base Level of ", var)] <- levels(source_data[[var]])[[1]]
            }
            else {
              ret[paste0(var, "_base")] <- levels(source_data[[var]])[[1]]
            }
          }
        }
      }
      ret
    })) %>%
    dplyr::select(!!!c("ret", group_by_names)) %>%
    unnest_with_drop(ret)

  # set it back to non-group-by state that is same as glance() output.
  if (length(group_by_names) == 0) {
    ret <- ret %>% dplyr::ungroup() %>% dplyr::select(-dummy_group_col)
  }

  if ("negbin" %in% class(df$model[[1]])) {
    x <- df$model[[1]]
    if(pretty.name) {
      ret <- ret %>% dplyr::mutate(`Theta`=x$theta, `SE Theta`=x$SE.theta)
    }
    else {
      ret <- ret %>% dplyr::mutate(theta=x$theta, SE.theta=x$SE.theta)
    }
  }

  # broom::glance.coxph output has both n and nobs which seems to have the same info.
  # Remove nobs in such cases to avoid duplicate column names as the result of the column name normalization below.
  if ("n" %in% colnames(ret) && "nobs" %in% colnames(ret)) {
    ret <- ret %>% dplyr::select(-nobs)
  }

  # adjust column name style
  if(pretty.name){
    colnames(ret)[colnames(ret) == "r.squared"] <- "R Squared"
    colnames(ret)[colnames(ret) == "adj.r.squared"] <- "Adj R Squared"
    colnames(ret)[colnames(ret) == "sigma"] <- "RMSE"
    colnames(ret)[colnames(ret) == "statistic"] <- "F Value"
    colnames(ret)[colnames(ret) == "p.value"] <- "P Value"
    colnames(ret)[colnames(ret) == "df"] <- "DF"
    colnames(ret)[colnames(ret) == "logLik"] <- "Log Likelihood"
    colnames(ret)[colnames(ret) == "deviance"] <- "Residual Deviance"
    colnames(ret)[colnames(ret) == "df.residual"] <- "Residual DF"
    # for glm
    colnames(ret)[colnames(ret) == "null.deviance"] <- "Null Deviance"
    colnames(ret)[colnames(ret) == "df.null"] <- "Null Model DF"
    colnames(ret)[colnames(ret) == "nobs"] <- "Rows"
    # for multinom
    colnames(ret)[colnames(ret) == "edf"] <- "Effective Number of DF"
    # for coxph
    colnames(ret)[colnames(ret) == "n"] <- "Rows"
    colnames(ret)[colnames(ret) == "nevent"] <- "Rows (TRUE)"
    colnames(ret)[colnames(ret) == "statistic.log"] <- "Likelihood Ratio Test"
    colnames(ret)[colnames(ret) == "p.value.log"] <- "Likelihood Ratio Test P Value"
    colnames(ret)[colnames(ret) == "statistic.sc"] <- "Score Test"
    colnames(ret)[colnames(ret) == "p.value.sc"] <- "Score Test P Value"
    colnames(ret)[colnames(ret) == "statistic.wald"] <- "Wald Test"
    colnames(ret)[colnames(ret) == "p.value.wald"] <- "Wald Test P Value"
    colnames(ret)[colnames(ret) == "r.squared.max"] <- "Max R Squared"
    colnames(ret)[colnames(ret) == "concordance"] <- "Concordance"
    colnames(ret)[colnames(ret) == "std.error.concordance"] <- "Std Error Concordance"


    colnames(ret)[colnames(ret) =="number_of_iteration"] <- "Number of Iteration"
    colnames(ret)[colnames(ret) =="root_mean_square_error"] <- "RMSE"
    colnames(ret)[colnames(ret) =="mean_absolute_error"] <- "Mean Absolute Error"
    colnames(ret)[colnames(ret) =="negative_log_likelihood"] <- "Negative Log Likelihood"
    colnames(ret)[colnames(ret) =="binary_misclassification_rate"] <- "Binary Misclass. Rate"
    colnames(ret)[colnames(ret) =="misclassification_rate"] <- "Misclass. Rate"
    colnames(ret)[colnames(ret) =="multiclass_logloss"] <- "Multiclass Logloss"
    colnames(ret)[colnames(ret) =="auc"] <- "AUC"
    colnames(ret)[colnames(ret) =="normalized_discounted_cumulative_gain"] <- "Normalized Discounted Cumulative Gain"
    colnames(ret)[colnames(ret) =="mean_average_precision"] <- "Mean Average Precision"
  }else{
    colnames(ret)[colnames(ret) == "r.squared"] <- "r_squared"
    colnames(ret)[colnames(ret) == "adj.r.squared"] <- "adj_r_squared"
    colnames(ret)[colnames(ret) == "sigma"] <- "root_mean_square_error"
    colnames(ret)[colnames(ret) == "statistic"] <- "f_ratio"
    colnames(ret)[colnames(ret) == "p.value"] <- "p_value"
    colnames(ret)[colnames(ret) == "logLik"] <- "log_likelihood"
    colnames(ret)[colnames(ret) == "df.residual"] <- "residual_df"
    # for glm
    colnames(ret)[colnames(ret) == "null.deviance"] <- "null_deviance"
    colnames(ret)[colnames(ret) == "df.null"] <- "df_for_null_model"
    colnames(ret)[colnames(ret) == "nobs"] <- "n" # Making it consistent with coxph
    # for multinom
    colnames(ret)[colnames(ret) == "edf"] <- "effective_number_of_df"
    # for coxph
    colnames(ret)[colnames(ret) == "nevent"] <- "n_event"
    colnames(ret)[colnames(ret) == "statistic.log"] <- "likelihood_ratio_test"
    colnames(ret)[colnames(ret) == "p.value.log"] <- "likelihood_ratio_test_p_value"
    colnames(ret)[colnames(ret) == "statistic.sc"] <- "score_test"
    colnames(ret)[colnames(ret) == "p.value.sc"] <- "score_test_p_value"
    colnames(ret)[colnames(ret) == "statistic.wald"] <- "wald_test"
    colnames(ret)[colnames(ret) == "p.value.wald"] <- "wald_test_p_value"
    colnames(ret)[colnames(ret) == "r.squared.max"] <- "r_squared_max"
    colnames(ret)[colnames(ret) == "std.error.concordance"] <- "std_error_concordance"
    colnames(ret)[colnames(ret) == "AIC"] <- "aic"
    colnames(ret)[colnames(ret) == "BIC"] <- "bic"
  }

  ret
}

#' tidy after converting model to anova
#' @export
model_anova <- function(df, pretty.name = FALSE){
  validate_empty_data(df)

  ret <- suppressWarnings({
    # this causes warning for Deviance, Resid..Df, Resid..Dev in glm model. TODO: Is this still true?
    df %>% dplyr::ungroup() %>%
      # Work around an error from tidy.anova with broom 1.0.0 that happens when heading attribute of the anova model is there.
      dplyr::mutate(output=purrr::map(model,function(m){am<-anova(m);attr(am,'heading')<-NULL;broom::tidy(am)})) %>%
      dplyr::select(-source.data, -.test_index, -model, -.model_metadata) %>%
      tidyr::unnest(output)
  })
  if(pretty.name){
    colnames(ret)[colnames(ret) == "term"] <- "Term"
    colnames(ret)[colnames(ret) == "sumsq"] <- "Sum of Squares"
    colnames(ret)[colnames(ret) == "meansq"] <- "Mean Square"
    colnames(ret)[colnames(ret) == "statistic"] <- "F Value"
    colnames(ret)[colnames(ret) == "p.value"] <- "P Value"
    colnames(ret)[colnames(ret) == "df"] <- "DF"
    # for glm anova
    colnames(ret)[colnames(ret) == "Resid..Df"] <- "Residual DF"
    colnames(ret)[colnames(ret) == "Resid..Dev"] <- "Residual Deviance"
    # for coxph anova
    colnames(ret)[colnames(ret) == "logLik"] <- "Log Likelihood"
    colnames(ret)[colnames(ret) == "Pr...Chi.."] <- "P Value" # looks like this means P value for chisquare test.
  } else {
    colnames(ret)[colnames(ret) == "sumsq"] <- "sum_of_squares"
    colnames(ret)[colnames(ret) == "meansq"] <- "mean_square"
    colnames(ret)[colnames(ret) == "statistic"] <- "f_ratio"
    colnames(ret)[colnames(ret) == "p.value"] <- "p_value"
    # for glm anova
    colnames(ret)[colnames(ret) == "Deviance"] <- "deviance"
    colnames(ret)[colnames(ret) == "Resid..Df"] <- "residual_df"
    # It returns "df.residual" in the newer R package version. 
    colnames(ret)[colnames(ret) == "df.residual"] <- "residual_df"
    colnames(ret)[colnames(ret) == "Resid..Dev"] <- "residual_deviance"
    # It returns "df.residual" in the newer R package version. 
    colnames(ret)[colnames(ret) == "residual.deviance"] <- "residual_deviance"
    # for coxph anova
    colnames(ret)[colnames(ret) == "logLik"] <- "log_likelihood"
    colnames(ret)[colnames(ret) == "Pr...Chi.."] <- "p_value"
  }
  ret
}

# function to use in prediction_survfit to remove single value columns.
# alternative to select_if which gives error when column name has '-' in it.
non_single_value_colnames <- function(data) {
  ret <- c()
  for (colname in colnames(data)) {
    if (length(unique(data[[colname]])) > 1) {
      ret <- c(ret, colname)
    } else if (is.logical(data[[colname]])) {
      ret <- c(ret,colname)
    }
  }
  ret
}

#' tidy after converting model to survfit
#' @param newdata Data frame with rows that represent cohorts to simulate
#' @export
prediction_survfit <- function(df, newdata = NULL, ...){
  validate_empty_data(df)

  caller <- match.call()
  # this expands dots arguemtns to character
  arg_char <- expand_args(caller, exclude = c("df"))
  if (arg_char != "") {
    fml <- paste0("broom::tidy(survival::survfit(m, ", arg_char, "))")
  } else {
    fml <- paste0("broom::tidy(survival::survfit(m))")
  }
  ret <- df %>% 
      dplyr::ungroup() %>%
      dplyr::mutate(output=purrr::map(model,function(m){eval(parse(text=fml))})) %>%
      dplyr::select(-source.data, -.test_index, -model, -.model_metadata) %>%
      tidyr::unnest(output)

  # if newdata exists and has more than one row, make output data frame tidy.
  # original output is in wide-format with columns like estimate.1, estimate.2, ...
  if (!is.null(newdata) && nrow(newdata) > 1) {
    # first, unite columns for a cohort into one column, so that gather at the next step works.
    united_colnames = c()
    for (i in 1:nrow(newdata)){
      united_colname = paste0("est", i)
      ret <- ret %>% tidyr::unite_(united_colname, c(paste0("estimate.",i), paste0("std.error.",i), paste0("conf.high.",i),paste0("conf.low.",i)), sep="_", remove=TRUE)
      united_colnames = c(united_colnames, united_colname)
    }
    # gather the united values into key column (.cohort.temp) and value column (.val.temp)
    gathered <- ret %>% tidyr::gather_(".cohort.temp", ".val.temp", united_colnames)
    # separte the value column to reverse the effect of unite() we did before.
    ret <- gathered %>% tidyr::separate_(".val.temp",c("estimate", "std_error", "conf_high", "conf_low"),sep="_")
    # convert characterized data back to numeric.
    ret <- ret %>% dplyr::mutate(estimate = as.numeric(estimate), std_error = as.numeric(std_error), conf_high = as.numeric(conf_high), conf_low = as.numeric(conf_low))
    # replace the cohort name with a string that is a concatenation of values that represents the cohort.
    # but, omit newdata column that has only 1 unique value from the cohort names we create here.
    selected_newdata_colnames <- non_single_value_colnames(newdata)
    # drop = FALSE keeps the output a data.frame as opposed to a vector even when only one column is selected.
    selected_newdata <- newdata[, selected_newdata_colnames, drop = FALSE]
    cohorts_labels <- selected_newdata %>% tidyr::unite(label, everything())
    for (i in 1:nrow(selected_newdata)){
      ret <- ret %>% dplyr::mutate(.cohort.temp = if_else(paste0("est", i) == .cohort.temp, cohorts_labels$label[[i]], .cohort.temp))
    }
    # replace the .cohort.temp column name with name like "age_sex".
    colnames(ret)[colnames(ret) == ".cohort.temp"] <- paste0(colnames(selected_newdata), collapse = "_")
  }

  colnames(ret)[colnames(ret) == "n.risk"] <- "n_risk"
  colnames(ret)[colnames(ret) == "n.event"] <- "n_event"
  colnames(ret)[colnames(ret) == "n.censor"] <- "n_censor"
  colnames(ret)[colnames(ret) == "std.error"] <- "std_error"
  colnames(ret)[colnames(ret) == "conf.low"] <- "conf_low"
  colnames(ret)[colnames(ret) == "conf.high"] <- "conf_high"
  ret
}

#' tidy after generating survfit
#' @export
do_survfit <- function(df, time, status, start_time = NULL, end_time = NULL, time_unit = "day", ...){
  validate_empty_data(df)

  grouped_col <- grouped_by(df)

  # substitute is needed because time can be
  # NSE column name and it throws an evaluation error
  # without it
  if (is.null(substitute(time))) {
    start_time_col <- col_name(substitute(start_time))
    df[[start_time_col]] <- as.Date(df[[start_time_col]]) # convert to Date in case it is POSIXct.
    if (!is.null(substitute(end_time))) { # if end_time exists, fill NA with today()
      end_time_col <- col_name(substitute(end_time))
      df[[end_time_col]] <- as.Date(df[[end_time_col]]) # convert to Date in case it is POSIXct.
      df[[end_time_col]] <- impute_na(df[[end_time_col]] ,type = "value", val=lubridate::today())
    }
    else { # if end_time does not exist, create .end_time column with value of today()
      end_time_col <- avoid_conflict(colnames(df), ".end_time")
      df[[end_time_col]] <- lubridate::today()
    }

    # as.numeric() does not support all units.
    # also support of units are different between Date and POSIXct.
    # let's do it ourselves.
    time_unit_days_str = switch(time_unit,
                                day = "1",
                                week = "7",
                                month = "(365.25/12)",
                                quarter = "(365.25/4)",
                                year = "365.25",
                                "1")
    # we are ceiling survival time to make it integer in the specified time unit.
    # this is to make resulting survival curve to have integer data point in the specified time unit.
    fml <- as.formula(paste0("survival::Surv(ceiling(as.numeric(`", end_time_col, "`-`", start_time_col, "`, units = \"days\")/", time_unit_days_str, "), `", substitute(status), "`) ~ 1"))
  }
  else {
    # need to compose formula with non-standard evaluation.
    # simply using time and status in formula here results in a formula that literally looks at
    # "time" columun and "status" column, which is not what we want.
    fml <- as.formula(paste0("survival::Surv(`", substitute(time), "`,`", substitute(status), "`) ~ 1"))
  }

  ret <- df %>% build_model(model_func = survival::survfit, formula = fml, ...) %>% tidy_rowwise(model)

  # for better viz, add time=0 row for each group when it is not already there.
  add_time_zero_row_each <- function(df) {
    if(!is.null(grouped_col)){
      # drop grouping columns
      df <- df[, !colnames(df) %in% grouped_col]
    }
    if (nrow(df[df$time==0,]) == 0) { # do this only when time=0 row is not already there.
      df <- rbind(data.frame(time=0, n.risk=df$n.risk[1], n.event=0, n.censor=0, estimate=1, std.error=0, conf.high=1, conf.low=1), df)
    }
    df
  }

  tmp_col <- avoid_conflict(colnames(ret), "tmp_col")
  ret <- ret %>%
    dplyr::do_(.dots=setNames(list(~add_time_zero_row_each(.)), tmp_col)) %>%
    dplyr::ungroup()
  ret <- ret %>%  unnest_with_drop(!!rlang::sym(tmp_col))

  colnames(ret)[colnames(ret) == "n.risk"] <- "n_risk"
  colnames(ret)[colnames(ret) == "n.event"] <- "n_event"
  colnames(ret)[colnames(ret) == "n.censor"] <- "n_censor"
  colnames(ret)[colnames(ret) == "std.error"] <- "std_error"
  colnames(ret)[colnames(ret) == "conf.low"] <- "conf_low"
  colnames(ret)[colnames(ret) == "conf.high"] <- "conf_high"
  ret
}

#' tidy after converting model to confint
#' @export
model_confint <- function(df, ...){ # TODO: Not used from anywhere, and the output does not look clean since broom 0.7.0. Delete at some point.
  validate_empty_data(df)

  caller <- match.call()
  # this expands dots arguemtns to character
  arg_char <- expand_args(caller, exclude = c("df"))
  if (arg_char != "") {
    fml <- paste0("broom::tidy(stats::confint(m, ", arg_char, "))")
  } else {
    fml <- paste0("broom::tidy(stats::confint(m))")
  }

  ret <- df %>% 
      dplyr::ungroup() %>%
      dplyr::mutate(output=purrr::map(model,function(m){eval(parse(text=fml))})) %>%
      dplyr::select(-source.data, -.test_index, -model, -.model_metadata) %>%
      tidyr::unnest(output)

  colnames(ret)[colnames(ret) == ".rownames"] <- "Term"
  # original columns are like X0.5..   X99.5.., so replace X to Prob and remove trailing dots
  new_p_cnames <- stringr::str_replace(colnames(ret)[(ncol(ret)-1):ncol(ret)], "X", "Prob ") %>%
    stringr::str_replace("\\.+$", "")
  colnames(ret)[(ncol(ret)-1):ncol(ret)] <- new_p_cnames
  ret
}