R/read.dendrometer.R
In dendRoAnalyst: A Tool for Processing and Analyzing Dendrometer Data

Documented in read.dendrometer

#' @title Reading dendrometer data
#' @description
#' Reads dendrometer data from \code{.csv}, \code{.txt}, \code{.tsv}, or \code{.xlsx} files,
#' automatically parsing the datetime information and converting it to
#' \code{\%Y-\%m-\%d \%H:\%M:\%S} in the requested timezone.
#'
#' Supports:
#' \itemize{
#'   \item automatic delimiter detection for text files,
#'   \item real decimal-mark auto-detection for text files,
#'   \item Excel serial date support (\code{1900} / \code{1904} systems),
#'   \item separate \code{date_col} + \code{time_col},
#'   \item optional import report output,
#'   \item optional time-resolution diagnostics.
#' }
#'
#' @param file Path to file (\code{.csv}, \code{.txt}, \code{.tsv}, \code{.xlsx}).
#' @param sep Optional delimiter for text files. If \code{NULL}, auto-detect
#'   among comma, semicolon, tab, and pipe.
#' @param dec Optional decimal mark for text files. If \code{NULL}, auto-detect
#'   between \code{"."} and \code{","}.
#' @param datetime_col Integer or name of the datetime column (default \code{1}).
#'   Ignored if \code{date_col} is provided.
#' @param date_col Optional integer or name of a date column.
#' @param time_col Optional integer or name of a time column. Used together with
#'   \code{date_col}. If \code{NULL}, only the date column is parsed and
#'   \code{"00:00:00"} can be appended if \code{assume_midnight = TRUE}.
#' @param tz Time zone for parsed datetimes (default \code{"UTC"}).
#' @param sheet Excel sheet name or index (for \code{.xlsx}; default \code{NULL} = first sheet).
#' @param range Excel cell range (optional).
#' @param na Character vector of strings to treat as NA.
#' @param assume_midnight Logical; if \code{TRUE}, rows with only a date get \code{"00:00:00"}.
#' @param orders Optional vector of \pkg{lubridate} orders to try. If \code{NULL},
#'   a comprehensive default set is used.
#' @param excel_dates Character. One of \code{"auto"}, \code{"none"}, \code{"1900"},
#'   or \code{"1904"}. Controls handling of numeric Excel serial dates.
#' @param drop_dup_times Logical; if \code{TRUE}, drop duplicated timestamps
#'   (keep first) with a warning/message.
#' @param detect_resolution Logical; if \code{TRUE}, compute basic time-resolution diagnostics.
#' @param return_report Logical; if \code{TRUE}, return a list with
#'   \code{$data} and \code{$report}. If \code{FALSE}, return the tibble only.
#'   In both cases, the report is attached as \code{attr(x, "import_report")}.
#' @param quiet Logical; if \code{TRUE}, suppress informational messages.
#'
#' @return
#' If \code{return_report = FALSE}, a tibble with a POSIXct first column and the
#' remaining data columns unchanged.
#'
#' If \code{return_report = TRUE}, a list with:
#' \describe{
#'   \item{\code{$data}}{The imported tibble.}
#'   \item{\code{$report}}{A structured import report.}
#' }
#'
#' @importFrom lubridate parse_date_time ymd_hms
#' @importFrom readxl read_excel
#' @importFrom tibble as_tibble tibble
#' @importFrom utils read.csv read.table head
#' @importFrom tools file_ext
#' @export
read.dendrometer <- function(
    file,
    sep = NULL,
    dec = NULL,
    datetime_col = 1,
    date_col = NULL,
    time_col = NULL,
    tz = "UTC",
    sheet = NULL,
    range = NULL,
    na = c("", "NA", "NaN", "nan", "null", "NULL", "-9999"),
    assume_midnight = TRUE,
    orders = NULL,
    excel_dates = c("auto", "none", "1900", "1904"),
    drop_dup_times = TRUE,
    detect_resolution = FALSE,
    return_report = FALSE,
    quiet = TRUE
) {

  excel_dates <- match.arg(excel_dates)

  # ---------------------------------------------------------------------------
  # helpers
  # ---------------------------------------------------------------------------

  default_orders <- function() {
    c(
      "Y-m-d H:M:S", "Y-m-d H:M", "Y-m-d",
      "Y/m/d H:M:S", "Y/m/d H:M", "Y/m/d",
      "Y.m.d H:M:S", "Y.m.d H:M", "Y.m.d",
      "d.m.Y H:M:S", "d.m.Y H:M", "d.m.Y",
      "d-m-Y H:M:S", "d-m-Y H:M", "d-m-Y",
      "d/m/Y H:M:S", "d/m/Y H:M", "d/m/Y",
      "m/d/Y H:M:S", "m/d/Y H:M", "m/d/Y",
      "Y-m-d I:M p", "Y-m-d I:M:S p",
      "m/d/Y I:M p", "m/d/Y I:M:S p",
      "d/m/Y I:M p", "d/m/Y I:M:S p",
      "Ymd HMS", "Ymd HM", "Ymd",
      "dmY HMS", "dmY HM", "dmY",
      "mdY HMS", "mdY HM", "mdY"
    )
  }

  get_col_index <- function(data, col, arg_name) {
    if (is.null(col)) return(NULL)

    if (is.character(col)) {
      idx <- match(col, names(data))
      if (is.na(idx)) stop(arg_name, " '", col, "' not found in columns.")
      return(idx)
    }

    idx <- as.integer(col)
    if (!is.finite(idx) || idx < 1 || idx > ncol(data)) {
      stop(arg_name, " index out of bounds.")
    }
    idx
  }

  sniff_sep <- function(line) {
    cands <- c("," = ",", ";" = ";", "\t" = "\t", "|" = "|")
    counts <- vapply(
      cands,
      function(s) length(strsplit(line, s, fixed = TRUE)[[1]]) - 1L,
      integer(1)
    )
    if (max(counts) == 0) "," else names(which.max(counts))
  }

  sniff_dec <- function(lines, sep) {
    if (length(lines) == 0) return(".")

    # If comma is delimiter, decimal comma is highly unlikely in plain delimited files.
    if (identical(sep, ",")) return(".")

    split_lines <- strsplit(lines, sep, fixed = TRUE)
    tokens <- unlist(split_lines, use.names = FALSE)
    tokens <- trimws(tokens)
    tokens <- tokens[nzchar(tokens)]

    # Simple numeric token patterns
    comma_hits <- sum(grepl("^-?[0-9]+,[0-9]+$", tokens))
    dot_hits   <- sum(grepl("^-?[0-9]+\\.[0-9]+$", tokens))

    if (comma_hits > dot_hits) "," else "."
  }

  detect_format_string <- function(x_chr) {
    x_chr <- trimws(as.character(x_chr))
    samp <- x_chr[!is.na(x_chr) & nzchar(x_chr)]
    samp <- utils::head(samp, 50)
    if (!length(samp)) return(NA_character_)

    pats <- list(
      "^\\d{1,2}/\\d{1,2}/\\d{4} \\d{1,2}:\\d{1,2}:\\d{1,2}$" = "%m/%d/%Y %H:%M:%S",
      "^\\d{1,2}/\\d{1,2}/\\d{4} \\d{1,2}:\\d{1,2}$" = "%m/%d/%Y %H:%M",
      "^\\d{4}-\\d{1,2}-\\d{1,2} \\d{1,2}:\\d{1,2}:\\d{1,2}$" = "%Y-%m-%d %H:%M:%S",
      "^\\d{4}-\\d{1,2}-\\d{1,2} \\d{1,2}:\\d{1,2}$" = "%Y-%m-%d %H:%M",
      "^\\d{4}-\\d{1,2}-\\d{1,2}$" = "%Y-%m-%d",
      "^\\d{4}/\\d{1,2}/\\d{1,2} \\d{1,2}:\\d{1,2}:\\d{1,2}$" = "%Y/%m/%d %H:%M:%S",
      "^\\d{4}/\\d{1,2}/\\d{1,2} \\d{1,2}:\\d{1,2}$" = "%Y/%m/%d %H:%M",
      "^\\d{4}/\\d{1,2}/\\d{1,2}$" = "%Y/%m/%d",
      "^\\d{4}\\.\\d{1,2}\\.\\d{1,2} \\d{1,2}:\\d{1,2}:\\d{1,2}$" = "%Y.%m.%d %H:%M:%S",
      "^\\d{4}\\.\\d{1,2}\\.\\d{1,2} \\d{1,2}:\\d{1,2}$" = "%Y.%m.%d %H:%M",
      "^\\d{4}\\.\\d{1,2}\\.\\d{1,2}$" = "%Y.%m.%d",
      "^\\d{1,2}\\.\\d{1,2}\\.\\d{4} \\d{1,2}:\\d{1,2}:\\d{1,2}$" = "%d.%m.%Y %H:%M:%S",
      "^\\d{1,2}\\.\\d{1,2}\\.\\d{4} \\d{1,2}:\\d{1,2}$" = "%d.%m.%Y %H:%M",
      "^\\d{1,2}\\.\\d{1,2}\\.\\d{4}$" = "%d.%m.%Y",
      "^\\d{1,2}-\\d{1,2}-\\d{4} \\d{1,2}:\\d{1,2}:\\d{1,2}$" = "%d-%m-%Y %H:%M:%S",
      "^\\d{1,2}-\\d{1,2}-\\d{4} \\d{1,2}:\\d{1,2}$" = "%d-%m-%Y %H:%M",
      "^\\d{1,2}-\\d{1,2}-\\d{4}$" = "%d-%m-%Y",
      "^\\d{1,2}/\\d{1,2}/\\d{4} \\d{1,2}:\\d{1,2} [APap][Mm]$" = "%m/%d/%Y %I:%M %p",
      "^\\d{4}-\\d{1,2}-\\d{1,2} \\d{1,2}:\\d{1,2} [APap][Mm]$" = "%Y-%m-%d %I:%M %p"
    )

    hits <- vapply(names(pats), function(rx) mean(grepl(rx, samp)), numeric(1))
    if (max(hits) > 0.9) pats[[which.max(hits)]] else NA_character_
  }

  append_midnight_if_needed <- function(x_chr, assume_midnight) {
    if (!assume_midnight) return(x_chr)

    x_chr <- trimws(x_chr)
    only_date <- grepl("^\\d{4}[-/.]\\d{1,2}[-/.]\\d{1,2}$", x_chr) |
      grepl("^\\d{1,2}[-/.]\\d{1,2}[-/.]\\d{4}$", x_chr) |
      grepl("^\\d{8}$", x_chr)

    x_chr[only_date] <- paste0(x_chr[only_date], " 00:00:00")
    x_chr
  }

  excel_serial_to_posix <- function(x_num, tz, system = c("1900", "1904")) {
    system <- match.arg(system)
    origin <- if (system == "1900") "1899-12-30" else "1904-01-01"
    out <- as.POSIXct(x_num * 86400, origin = origin, tz = tz)
    out
  }

  choose_excel_system <- function(x_num, tz, excel_dates) {
    systems <- if (excel_dates == "auto") c("1900", "1904") else excel_dates

    best <- list(
      system = NA_character_,
      dt = rep(as.POSIXct(NA, tz = tz), length(x_num)),
      score = -Inf
    )

    for (sys in systems) {
      dt <- suppressWarnings(excel_serial_to_posix(x_num, tz = tz, system = sys))
      yrs <- suppressWarnings(as.integer(format(dt, "%Y")))
      score <- sum(!is.na(dt) & yrs >= 1980 & yrs <= 2100)

      if (score > best$score) {
        best$system <- sys
        best$dt <- dt
        best$score <- score
      }
    }

    best
  }

  parse_datetime_vector <- function(x, tz, assume_midnight, orders, excel_dates) {
    if (is.null(orders)) orders <- default_orders()

    # Already parsed classes
    if (inherits(x, "POSIXct")) {
      return(list(
        dt = as.POSIXct(x, tz = tz),
        parse_method = "POSIXct",
        detected_format = "%Y-%m-%d %H:%M:%S",
        excel_system = NA_character_
      ))
    }

    if (inherits(x, "Date")) {
      dt <- as.POSIXct(x, tz = tz)
      return(list(
        dt = dt,
        parse_method = "Date",
        detected_format = "%Y-%m-%d",
        excel_system = NA_character_
      ))
    }

    detected_format <- NA_character_
    excel_system <- NA_character_

    # Text parse attempt
    x_chr <- trimws(as.character(x))
    x_chr <- append_midnight_if_needed(x_chr, assume_midnight = assume_midnight)
    detected_format <- detect_format_string(x_chr)

    dt_text <- suppressWarnings(
      lubridate::parse_date_time(
        x_chr,
        orders = orders,
        tz = tz,
        quiet = TRUE
      )
    )
    score_text <- sum(!is.na(dt_text))

    # Excel serial attempt
    dt_excel <- rep(as.POSIXct(NA, tz = tz), length(x_chr))
    score_excel <- -Inf

    if (excel_dates != "none") {
      numeric_like <- is.numeric(x) || all(
        is.na(x_chr) | !nzchar(x_chr) | grepl("^-?[0-9]+([.][0-9]+)?$", x_chr)
      )

      if (numeric_like) {
        x_num <- suppressWarnings(as.numeric(x_chr))
        chosen <- choose_excel_system(x_num, tz = tz, excel_dates = excel_dates)
        dt_excel <- chosen$dt
        excel_system <- chosen$system
        score_excel <- chosen$score
      }
    }

    # Decide
    use_excel <- FALSE
    if (score_excel > score_text && score_excel > 0) {
      use_excel <- TRUE
    } else if (score_text == 0 && score_excel > 0) {
      use_excel <- TRUE
    }

    if (use_excel) {
      list(
        dt = dt_excel,
        parse_method = "excel_serial",
        detected_format = paste0("Excel serial date (", excel_system, " system)"),
        excel_system = excel_system
      )
    } else {
      list(
        dt = dt_text,
        parse_method = "text",
        detected_format = detected_format,
        excel_system = NA_character_
      )
    }
  }

  compute_resolution_diag <- function(dt) {
    dt_ok <- dt[!is.na(dt)]
    dt_ok <- sort(unique(dt_ok))

    if (length(dt_ok) < 2) {
      return(list(
        n_timestamps = length(dt_ok),
        start_time = if (length(dt_ok)) min(dt_ok) else as.POSIXct(NA),
        end_time = if (length(dt_ok)) max(dt_ok) else as.POSIXct(NA),
        median_step_seconds = NA_real_,
        median_step_minutes = NA_real_,
        dominant_step_seconds = NA_real_,
        dominant_step_minutes = NA_real_,
        irregular_intervals = NA_integer_,
        estimated_missing_steps = NA_integer_,
        step_table = tibble::tibble(
          step_seconds = numeric(0),
          n = integer(0),
          proportion = numeric(0)
        )
      ))
    }

    diffs_sec <- as.numeric(diff(dt_ok), units = "secs")
    diffs_sec <- round(diffs_sec)

    step_tab <- as.data.frame(table(diffs_sec), stringsAsFactors = FALSE)
    names(step_tab) <- c("step_seconds", "n")
    step_tab$step_seconds <- as.numeric(as.character(step_tab$step_seconds))
    step_tab$n <- as.integer(step_tab$n)
    step_tab <- step_tab[order(step_tab$n, decreasing = TRUE), , drop = FALSE]
    step_tab$proportion <- step_tab$n / sum(step_tab$n)
    step_tab <- tibble::as_tibble(step_tab)

    dominant_step <- if (nrow(step_tab)) step_tab$step_seconds[1] else NA_real_
    median_step <- stats::median(diffs_sec, na.rm = TRUE)

    irregular_intervals <- if (is.finite(dominant_step)) {
      sum(diffs_sec != dominant_step, na.rm = TRUE)
    } else {
      NA_integer_
    }

    estimated_missing_steps <- if (is.finite(dominant_step) && dominant_step > 0) {
      sum(pmax(round(diffs_sec / dominant_step) - 1L, 0L), na.rm = TRUE)
    } else {
      NA_integer_
    }

    list(
      n_timestamps = length(dt_ok),
      start_time = min(dt_ok),
      end_time = max(dt_ok),
      median_step_seconds = median_step,
      median_step_minutes = median_step / 60,
      dominant_step_seconds = dominant_step,
      dominant_step_minutes = dominant_step / 60,
      irregular_intervals = irregular_intervals,
      estimated_missing_steps = estimated_missing_steps,
      step_table = step_tab
    )
  }

  make_report <- function(
    file, ext, sep_used, dec_used, datetime_mode, datetime_name,
    parse_method, detected_format, excel_system, tz,
    n_rows_input, n_rows_output, n_cols_output,
    n_bad_datetime, bad_rows, n_dup, dup_dropped,
    resolution_diag
  ) {
    list(
      file = normalizePath(file, winslash = "/", mustWork = FALSE),
      file_type = ext,
      separator = sep_used,
      decimal_mark = dec_used,
      datetime_source = datetime_mode,
      datetime_column_name = datetime_name,
      datetime_parse_method = parse_method,
      datetime_format_detected = detected_format,
      excel_date_system = excel_system,
      target_format = "%Y-%m-%d %H:%M:%S",
      tz = tz,
      n_rows_input = n_rows_input,
      n_rows_output = n_rows_output,
      n_cols_output = n_cols_output,
      n_bad_datetime = n_bad_datetime,
      bad_datetime_rows = bad_rows,
      n_duplicated_timestamps = n_dup,
      duplicated_timestamps_dropped = dup_dropped,
      resolution_diagnostics = resolution_diag
    )
  }

  # ---------------------------------------------------------------------------
  # read file
  # ---------------------------------------------------------------------------

  ext <- tolower(tools::file_ext(file))
  if (!ext %in% c("csv", "txt", "tsv", "xlsx")) {
    stop("Unsupported file extension: ", ext, " (use csv/txt/tsv/xlsx)")
  }

  sep_used <- NA_character_
  dec_used <- NA_character_

  if (ext == "xlsx") {
    df <- if (is.null(sheet) && is.null(range)) {
      readxl::read_excel(file, na = na)
    } else if (is.null(range)) {
      readxl::read_excel(file, sheet = sheet, na = na)
    } else {
      readxl::read_excel(file, sheet = sheet, range = range, na = na)
    }
    df <- tibble::as_tibble(df)
  } else {
    if (ext == "tsv" && is.null(sep)) sep <- "\t"

    lines <- readLines(file, n = 100L, warn = FALSE)
    lines <- lines[nzchar(trimws(lines))]

    if (!length(lines)) stop("File appears to be empty.")

    first_line <- lines[1]
    data_lines <- if (length(lines) > 1) lines[-1] else character(0)

    if (is.null(sep)) sep <- sniff_sep(first_line)
    if (is.null(dec)) dec <- sniff_dec(data_lines, sep = sep)

    sep_used <- sep
    dec_used <- dec

    if (requireNamespace("readr", quietly = TRUE)) {
      df <- readr::read_delim(
        file = file,
        delim = sep,
        na = na,
        locale = readr::locale(decimal_mark = dec),
        show_col_types = FALSE,
        progress = FALSE
      )
      df <- tibble::as_tibble(df)
    } else {
      if (sep %in% c(",", ";", "|")) {
        df <- utils::read.csv(
          file,
          header = TRUE,
          sep = sep,
          dec = dec,
          na.strings = na,
          check.names = FALSE,
          stringsAsFactors = FALSE
        )
      } else {
        df <- utils::read.table(
          file,
          header = TRUE,
          sep = sep,
          dec = dec,
          na.strings = na,
          check.names = FALSE,
          stringsAsFactors = FALSE
        )
      }
      df <- tibble::as_tibble(df)
    }
  }

  if (ncol(df) < 1) stop("No columns found in file.")

  n_rows_input <- nrow(df)

  # ---------------------------------------------------------------------------
  # build datetime
  # ---------------------------------------------------------------------------

  use_separate_cols <- !is.null(date_col) || !is.null(time_col)

  if (use_separate_cols) {
    if (is.null(date_col)) {
      stop("If 'time_col' is supplied, 'date_col' must also be supplied.")
    }

    date_idx <- get_col_index(df, date_col, "date_col")
    time_idx <- get_col_index(df, time_col, "time_col")

    date_name <- names(df)[date_idx]
    out_name <- if (is.null(time_idx)) date_name else "DateTime"

    raw_date <- trimws(as.character(df[[date_idx]]))
    raw_datetime <- if (is.null(time_idx)) {
      raw_date
    } else {
      raw_time <- trimws(as.character(df[[time_idx]]))
      paste(raw_date, raw_time)
    }

    parsed <- parse_datetime_vector(
      raw_datetime,
      tz = tz,
      assume_midnight = assume_midnight,
      orders = orders,
      excel_dates = excel_dates
    )

    dt <- parsed$dt

    drop_idx <- sort(unique(c(date_idx, time_idx)))
    other_df <- df[, setdiff(seq_len(ncol(df)), drop_idx), drop = FALSE]

    time_df <- tibble::tibble(..dt.. = dt)
    names(time_df)[1] <- out_name
    df <- tibble::as_tibble(cbind(time_df, other_df))

    datetime_mode <- if (is.null(time_idx)) "date_col" else "date_col + time_col"
    datetime_name <- out_name

  } else {
    dt_idx <- get_col_index(df, datetime_col, "datetime_col")
    old_name <- names(df)[dt_idx]

    parsed <- parse_datetime_vector(
      df[[dt_idx]],
      tz = tz,
      assume_midnight = assume_midnight,
      orders = orders,
      excel_dates = excel_dates
    )

    dt <- parsed$dt
    df[[dt_idx]] <- dt

    if (dt_idx != 1L) {
      df <- df[, c(dt_idx, setdiff(seq_len(ncol(df)), dt_idx)), drop = FALSE]
    }

    datetime_mode <- "datetime_col"
    datetime_name <- old_name
  }

  # ---------------------------------------------------------------------------
  # reporting messages
  # ---------------------------------------------------------------------------

  n_bad_datetime <- sum(is.na(df[[1]]))
  bad_rows <- which(is.na(df[[1]]))
  bad_rows <- utils::head(bad_rows, 20L)

  if (!quiet) {
    if (!is.na(parsed$detected_format)) {
      message(
        "Detected datetime format: '", parsed$detected_format,
        "'. Converted to '%Y-%m-%d %H:%M:%S'."
      )
    } else {
      message("Datetime parsed and converted to '%Y-%m-%d %H:%M:%S'.")
    }

    if (!is.na(sep_used)) {
      message("Detected separator: '", sep_used, "'.")
    }
    if (!is.na(dec_used)) {
      message("Detected decimal mark: '", dec_used, "'.")
    }

    if (parsed$parse_method == "excel_serial") {
      message("Datetime interpreted as Excel serial date (", parsed$excel_system, " system).")
    }

    if (n_bad_datetime > 0) {
      message(n_bad_datetime, " rows could not be parsed as datetime and remain NA.")
    }
  }

  # ---------------------------------------------------------------------------
  # sort + duplicates
  # ---------------------------------------------------------------------------

  ord <- order(df[[1]], na.last = TRUE)
  df <- df[ord, , drop = FALSE]

  dup <- duplicated(df[[1]]) & !is.na(df[[1]])
  n_dup <- sum(dup)

  if (n_dup > 0) {
    if (drop_dup_times) {
      if (!quiet) {
        message(
          "Found ", n_dup,
          " duplicated timestamps; keeping first occurrence and dropping the rest."
        )
      }
      df <- df[!dup, , drop = FALSE]
    } else {
      if (!quiet) {
        message(
          "Found ", n_dup,
          " duplicated timestamps; leaving as-is (set drop_dup_times = TRUE to drop)."
        )
      }
    }
  }

  # ---------------------------------------------------------------------------
  # resolution diagnostics
  # ---------------------------------------------------------------------------

  resolution_diag <- NULL
  if (detect_resolution) {
    resolution_diag <- compute_resolution_diag(df[[1]])

    if (!quiet && !is.null(resolution_diag)) {
      if (is.finite(resolution_diag$dominant_step_minutes)) {
        message(
          "Detected dominant time step: ",
          format(round(resolution_diag$dominant_step_minutes, 4), trim = TRUE),
          " minutes."
        )
      }
    }
  }

  # ---------------------------------------------------------------------------
  # import report
  # ---------------------------------------------------------------------------

  report <- make_report(
    file = file,
    ext = ext,
    sep_used = sep_used,
    dec_used = dec_used,
    datetime_mode = datetime_mode,
    datetime_name = names(df)[1],
    parse_method = parsed$parse_method,
    detected_format = parsed$detected_format,
    excel_system = parsed$excel_system,
    tz = tz,
    n_rows_input = n_rows_input,
    n_rows_output = nrow(df),
    n_cols_output = ncol(df),
    n_bad_datetime = n_bad_datetime,
    bad_rows = bad_rows,
    n_dup = n_dup,
    dup_dropped = drop_dup_times,
    resolution_diag = resolution_diag
  )

  attr(df, "import_report") <- report

  if (return_report) {
    return(list(
      data = df,
      report = report
    ))
  }

  df
}