R/climate_functions.R

Defines functions dm_subdaily_clim dm_daily_clim read.climate .dm_prepare_climate .dm_lag_vec .dm_roll_right .dm_safe_agg .dm_is_irregular .dm_resolution_hours .dm_maybe_numeric .dm_auto_find_time_col .dm_parse_time_candidate

Documented in dm_daily_clim dm_subdaily_clim read.climate

# =========================================================
# Internal helpers
# =========================================================

.dm_parse_time_candidate <- function(x, tz = "UTC") {
  if (inherits(x, "POSIXct")) return(as.POSIXct(x, tz = tz))
  if (inherits(x, "Date")) return(as.POSIXct(x, tz = tz))

  if (is.factor(x)) x <- as.character(x)

  # numeric: try Excel serial first, then Unix seconds, then Unix milliseconds
  if (is.numeric(x)) {
    out <- rep(as.POSIXct(NA, origin = "1970-01-01", tz = tz), length(x))
    ok <- is.finite(x)

    if (any(ok)) {
      xr <- range(x[ok], na.rm = TRUE)

      # Excel serial dates (roughly 1954-2149)
      if (xr[1] > 20000 && xr[2] < 100000) {
        out[ok] <- as.POSIXct(x[ok] * 86400, origin = "1899-12-30", tz = tz)
        return(out)
      }

      # Unix milliseconds
      if (xr[1] > 1e12 && xr[2] < 5e13) {
        out[ok] <- as.POSIXct(x[ok] / 1000, origin = "1970-01-01", tz = tz)
        return(out)
      }

      # Unix seconds
      if (xr[1] > 1e8 && xr[2] < 5e9) {
        out[ok] <- as.POSIXct(x[ok], origin = "1970-01-01", tz = tz)
        return(out)
      }
    }

    return(out)
  }

  x <- trimws(as.character(x))
  x[x == ""] <- NA_character_

  # try multiple common date-time/date formats
  parsed <- suppressWarnings(
    lubridate::parse_date_time(
      x,
      orders = c(
        "ymd HMS", "ymd HM", "ymd",
        "Ymd HMS", "Ymd HM", "Ymd",
        "dmy HMS", "dmy HM", "dmy",
        "mdy HMS", "mdy HM", "mdy"
      ),
      tz = tz,
      quiet = TRUE
    )
  )

  as.POSIXct(parsed, tz = tz)
}

.dm_auto_find_time_col <- function(dat, tz = "UTC", min_success = 0.6) {
  nms <- names(dat)
  scores <- rep(NA_real_, length(nms))
  parsed_list <- vector("list", length(nms))

  for (i in seq_along(nms)) {
    v <- dat[[i]]
    parsed <- .dm_parse_time_candidate(v, tz = tz)
    parsed_list[[i]] <- parsed

    valid_input <- if (is.character(v) || is.factor(v)) {
      !is.na(v) & trimws(as.character(v)) != ""
    } else {
      !is.na(v)
    }

    denom <- sum(valid_input)
    success <- if (denom == 0) 0 else sum(!is.na(parsed) & valid_input) / denom

    # slight preference to names that look like time/date columns
    name_bonus <- if (grepl("time|date|datetime|timestamp", nms[i], ignore.case = TRUE)) 0.05 else 0
    scores[i] <- success + name_bonus
  }

  best <- which.max(scores)
  best_success <- if (length(best) == 0) 0 else scores[best]

  if (length(best) == 0 || best_success < min_success) {
    stop(
      "Could not detect a valid climate time column automatically. ",
      "Please provide 'time_col' explicitly. Accepted formats are POSIXct, Date, ",
      "or character timestamps such as 'yyyy-mm-dd HH:MM:SS'."
    )
  }

  list(
    time_col = nms[best],
    parsed_time = parsed_list[[best]]
  )
}

.dm_maybe_numeric <- function(x, dec = ".") {
  if (is.numeric(x)) return(x)
  if (inherits(x, "POSIXct") || inherits(x, "Date")) return(x)

  z <- trimws(as.character(x))
  z[z == ""] <- NA_character_

  if (!identical(dec, ".")) {
    z <- gsub(dec, ".", z, fixed = TRUE)
  }

  num <- suppressWarnings(as.numeric(z))

  ok_in <- sum(!is.na(z))
  ok_out <- sum(!is.na(num))

  # only coerce if most non-missing values can be converted
  if (ok_in > 0 && ok_out / ok_in >= 0.8) {
    return(num)
  }

  x
}

.dm_resolution_hours <- function(time) {
  d <- diff(as.numeric(time)) / 3600
  d <- d[is.finite(d) & d > 0]
  if (length(d) == 0) return(NA_real_)
  stats::median(d)
}

.dm_is_irregular <- function(time) {
  d <- diff(as.numeric(time)) / 60
  d <- d[is.finite(d) & d > 0]
  if (length(d) <= 1) return(FALSE)
  length(unique(round(d, 6))) > 1
}

.dm_safe_agg <- function(x, FUN) {
  if (length(x) == 0 || all(is.na(x))) return(NA_real_)
  FUN(x, na.rm = TRUE)
}

.dm_roll_right <- function(x, n, FUN) {
  n <- as.integer(n)
  out <- rep(NA_real_, length(x))

  if (length(x) == 0 || n <= 0) return(out)
  if (n > length(x)) return(out)

  for (i in seq_along(x)) {
    if (i >= n) {
      out[i] <- .dm_safe_agg(x[(i - n + 1):i], FUN)
    }
  }
  out
}

.dm_lag_vec <- function(x, n) {
  n <- as.integer(n)
  if (n <= 0) return(x)
  if (n >= length(x)) return(rep(NA, length(x)))
  c(rep(NA, n), x[seq_len(length(x) - n)])
}

.dm_prepare_climate <- function(clim_input, verbose = FALSE) {
  if (inherits(clim_input, "dm_clim")) {
    out <- tibble::as_tibble(clim_input)
    out$TIME <- as.POSIXct(out$TIME)
    out <- out[order(out$TIME), , drop = FALSE]
    return(out)
  }
  read.climate(clim_input, verbose = verbose)
}



#' @title Read and standardize climate data for dendrometer analyses
#'
#' @description
#' A robust climate-data reader designed to be as flexible as read.dendrometer().
#' It accepts data frames and common file formats, auto-detects separators and
#' decimal marks, parses many datetime formats, supports Excel serial dates,
#' supports separate date + time columns, detects the time column automatically,
#' sorts timestamps, removes duplicates, converts numeric-like climate variables,
#' and returns a standardized tibble with a POSIXct TIME column.
#'
#' @param x A data frame or path to a file. Supported file extensions are
#'   csv, txt, tsv, tab, dat, xls, xlsx, rds, rda, and RData.
#' @param time_col Backward-compatible explicit time/datetime column name or index.
#'   If date_col is also supplied, this is treated as the time-of-day column.
#' @param vars Optional character vector of climate variables to keep.
#' @param sep Optional field separator for text files. If NULL, it is auto-detected.
#' @param dec Optional decimal mark for text files. If NULL, it is auto-detected.
#' @param header Logical; passed to text-file readers.
#' @param sheet Sheet name or index for Excel files.
#' @param tz Time zone for parsed timestamps.
#' @param drop_duplicate_time Logical; if TRUE, duplicated timestamps are removed.
#' @param min_time_success Minimum parsing success proportion for automatic time detection.
#' @param verbose Logical; print an import summary.
#' @param datetime_col Optional explicit datetime column name or index. Prefer this
#'   when the file has one combined timestamp column.
#' @param date_col Optional explicit date column name or index. Can be combined with
#'   time_col, or parsed alone with midnight appended when assume_midnight = TRUE.
#' @param range Optional Excel cell range.
#' @param na Strings to treat as missing values.
#' @param assume_midnight Logical; if TRUE, date-only values are assigned 00:00:00.
#' @param orders Optional lubridate parse_date_time() orders.
#' @param excel_dates One of auto, none, 1900, or 1904.
#' @param drop_empty_cols Logical; if TRUE, columns that are completely empty are removed.
#' @param trim_names Logical; if TRUE, trim whitespace from column names.
#' @param detect_resolution Logical; if TRUE, attach simple time-resolution diagnostics.
#' @param return_report Logical; if TRUE, return list(data = ..., report = ...).
#' @param quiet Logical; suppress messages. By default this is the inverse of verbose.
#'
#' @return A tibble of class dm_clim with TIME in the first column. An import
#'   report is attached as attr(x, "import_report").
#'
#' @export
read.climate <- function(
    x,
    time_col = NULL,
    vars = NULL,
    sep = NULL,
    dec = NULL,
    header = TRUE,
    sheet = 1,
    tz = "UTC",
    drop_duplicate_time = TRUE,
    min_time_success = 0.6,
    verbose = TRUE,
    datetime_col = NULL,
    date_col = NULL,
    range = NULL,
    na = c("", "NA", "NaN", "nan", "null", "NULL", "-9999", "-999", "N/A"),
    assume_midnight = TRUE,
    orders = NULL,
    excel_dates = c("auto", "none", "1900", "1904"),
    drop_empty_cols = TRUE,
    trim_names = TRUE,
    detect_resolution = FALSE,
    return_report = FALSE,
    quiet = !verbose
) {
  excel_dates <- match.arg(excel_dates)

  # -------------------------------------------------------------------------
  # Helpers
  # -------------------------------------------------------------------------
  `%||%` <- function(a, b) if (!is.null(a)) a else b

  default_orders <- function() {
    c(
      "Y-m-d H:M:S", "Y-m-d H:M", "Y-m-d",
      "Y/m/d H:M:S", "Y/m/d H:M", "Y/m/d",
      "Y.m.d H:M:S", "Y.m.d H:M", "Y.m.d",
      "d.m.Y H:M:S", "d.m.Y H:M", "d.m.Y",
      "d-m-Y H:M:S", "d-m-Y H:M", "d-m-Y",
      "d/m/Y H:M:S", "d/m/Y H:M", "d/m/Y",
      "m/d/Y H:M:S", "m/d/Y H:M", "m/d/Y",
      "Y-m-d I:M p", "Y-m-d I:M:S p",
      "m/d/Y I:M p", "m/d/Y I:M:S p",
      "d/m/Y I:M p", "d/m/Y I:M:S p",
      "Ymd HMS", "Ymd HM", "Ymd",
      "dmY HMS", "dmY HM", "dmY",
      "mdY HMS", "mdY HM", "mdY"
    )
  }

  get_col_index <- function(data, col, arg_name) {
    if (is.null(col)) return(NULL)

    if (is.character(col)) {
      idx <- match(col, names(data))
      if (is.na(idx)) {
        idx <- match(tolower(col), tolower(names(data)))
      }
      if (is.na(idx)) stop(arg_name, " '", col, "' not found in columns.", call. = FALSE)
      return(idx)
    }

    idx <- suppressWarnings(as.integer(col))
    if (!is.finite(idx) || idx < 1 || idx > ncol(data)) {
      stop(arg_name, " index out of bounds.", call. = FALSE)
    }
    idx
  }

  nonempty <- function(z) {
    if (is.factor(z)) z <- as.character(z)
    if (is.character(z)) return(!is.na(z) & trimws(z) != "")
    !is.na(z)
  }

  sniff_sep <- function(lines) {
    lines <- lines[nzchar(trimws(lines))]
    if (!length(lines)) return(",")

    candidates <- c("," = ",", ";" = ";", "\\t" = "\t", "|" = "|")

    scores <- vapply(candidates, function(s) {
      counts <- vapply(
        lines,
        function(line) length(strsplit(line, s, fixed = TRUE)[[1]]) - 1L,
        integer(1)
      )
      if (max(counts, na.rm = TRUE) == 0) return(0)
      median(counts, na.rm = TRUE) + stats::sd(counts, na.rm = TRUE) * -0.1
    }, numeric(1))

    if (max(scores, na.rm = TRUE) <= 0) "" else candidates[[which.max(scores)]]
  }

  sniff_dec <- function(lines, sep) {
    if (!length(lines)) return(".")
    if (identical(sep, ",")) return(".")

    if (identical(sep, "")) {
      tokens <- unlist(strsplit(lines, "[[:space:]]+"), use.names = FALSE)
    } else {
      tokens <- unlist(strsplit(lines, sep, fixed = TRUE), use.names = FALSE)
    }
    tokens <- trimws(tokens)
    tokens <- tokens[nzchar(tokens)]

    comma_hits <- sum(grepl("^-?[0-9]+,[0-9]+$", tokens))
    dot_hits <- sum(grepl("^-?[0-9]+\\.[0-9]+$", tokens))

    if (comma_hits > dot_hits) "," else "."
  }

  append_midnight_if_needed <- function(x_chr) {
    if (!assume_midnight) return(x_chr)

    x_chr <- trimws(x_chr)
    only_date <- grepl("^\\d{4}[-/.]\\d{1,2}[-/.]\\d{1,2}$", x_chr) |
      grepl("^\\d{1,2}[-/.]\\d{1,2}[-/.]\\d{4}$", x_chr) |
      grepl("^\\d{8}$", x_chr)

    x_chr[only_date] <- paste0(x_chr[only_date], " 00:00:00")
    x_chr
  }

  excel_serial_to_posix <- function(x_num, tz, system = c("1900", "1904")) {
    system <- match.arg(system)
    origin <- if (system == "1900") "1899-12-30" else "1904-01-01"
    as.POSIXct(x_num * 86400, origin = origin, tz = tz)
  }

  choose_excel_system <- function(x_num, tz, excel_dates) {
    systems <- if (excel_dates == "auto") c("1900", "1904") else excel_dates

    best <- list(
      system = NA_character_,
      dt = rep(as.POSIXct(NA, tz = tz), length(x_num)),
      score = -Inf
    )

    for (sys in systems) {
      dt <- suppressWarnings(excel_serial_to_posix(x_num, tz = tz, system = sys))
      yrs <- suppressWarnings(as.integer(format(dt, "%Y")))
      score <- sum(!is.na(dt) & yrs >= 1980 & yrs <= 2100)
      if (score > best$score) {
        best$system <- sys
        best$dt <- dt
        best$score <- score
      }
    }

    best
  }

  detect_format_string <- function(x_chr) {
    x_chr <- trimws(as.character(x_chr))
    samp <- x_chr[!is.na(x_chr) & nzchar(x_chr)]
    samp <- utils::head(samp, 50)
    if (!length(samp)) return(NA_character_)

    pats <- list(
      "^\\d{4}-\\d{1,2}-\\d{1,2} \\d{1,2}:\\d{1,2}:\\d{1,2}$" = "%Y-%m-%d %H:%M:%S",
      "^\\d{4}-\\d{1,2}-\\d{1,2} \\d{1,2}:\\d{1,2}$" = "%Y-%m-%d %H:%M",
      "^\\d{4}-\\d{1,2}-\\d{1,2}$" = "%Y-%m-%d",
      "^\\d{4}/\\d{1,2}/\\d{1,2} \\d{1,2}:\\d{1,2}:\\d{1,2}$" = "%Y/%m/%d %H:%M:%S",
      "^\\d{4}/\\d{1,2}/\\d{1,2} \\d{1,2}:\\d{1,2}$" = "%Y/%m/%d %H:%M",
      "^\\d{4}/\\d{1,2}/\\d{1,2}$" = "%Y/%m/%d",
      "^\\d{1,2}\\.\\d{1,2}\\.\\d{4} \\d{1,2}:\\d{1,2}:\\d{1,2}$" = "%d.%m.%Y %H:%M:%S",
      "^\\d{1,2}\\.\\d{1,2}\\.\\d{4} \\d{1,2}:\\d{1,2}$" = "%d.%m.%Y %H:%M",
      "^\\d{1,2}\\.\\d{1,2}\\.\\d{4}$" = "%d.%m.%Y",
      "^\\d{1,2}/\\d{1,2}/\\d{4} \\d{1,2}:\\d{1,2}:\\d{1,2}$" = "%m/%d/%Y %H:%M:%S or %d/%m/%Y %H:%M:%S",
      "^\\d{1,2}/\\d{1,2}/\\d{4} \\d{1,2}:\\d{1,2}$" = "%m/%d/%Y %H:%M or %d/%m/%Y %H:%M",
      "^\\d{1,2}/\\d{1,2}/\\d{4}$" = "%m/%d/%Y or %d/%m/%Y",
      "^\\d{4}-\\d{1,2}-\\d{1,2} \\d{1,2}:\\d{1,2} [APap][Mm]$" = "%Y-%m-%d %I:%M %p",
      "^\\d{1,2}/\\d{1,2}/\\d{4} \\d{1,2}:\\d{1,2} [APap][Mm]$" = "%m/%d/%Y %I:%M %p or %d/%m/%Y %I:%M %p"
    )

    hits <- vapply(names(pats), function(rx) mean(grepl(rx, samp)), numeric(1))
    if (max(hits) > 0.9) pats[[which.max(hits)]] else NA_character_
  }

  parse_datetime_vector <- function(z) {
    if (is.null(orders)) orders_local <- default_orders() else orders_local <- orders

    if (inherits(z, "POSIXct")) {
      return(list(
        dt = as.POSIXct(z, tz = tz),
        method = "POSIXct",
        detected_format = "%Y-%m-%d %H:%M:%S",
        excel_system = NA_character_
      ))
    }

    if (inherits(z, "POSIXlt")) {
      return(list(
        dt = as.POSIXct(z, tz = tz),
        method = "POSIXlt",
        detected_format = "%Y-%m-%d %H:%M:%S",
        excel_system = NA_character_
      ))
    }

    if (inherits(z, "Date")) {
      return(list(
        dt = as.POSIXct(z, tz = tz),
        method = "Date",
        detected_format = "%Y-%m-%d",
        excel_system = NA_character_
      ))
    }

    z_chr <- trimws(as.character(z))
    z_chr[z_chr %in% na] <- NA_character_
    z_chr <- append_midnight_if_needed(z_chr)

    detected_format <- detect_format_string(z_chr)

    dt_text <- suppressWarnings(lubridate::parse_date_time(
      z_chr,
      orders = orders_local,
      tz = tz,
      quiet = TRUE
    ))
    dt_text <- as.POSIXct(dt_text, tz = tz)
    score_text <- sum(!is.na(dt_text))

    dt_unix <- rep(as.POSIXct(NA, tz = tz), length(z_chr))
    score_unix <- -Inf
    unix_method <- NA_character_

    z_num <- suppressWarnings(as.numeric(z_chr))
    numeric_like <- is.numeric(z) || all(is.na(z_chr) | !nzchar(z_chr) | grepl("^-?[0-9]+([.]?[0-9]+)?$", z_chr))

    if (numeric_like && any(is.finite(z_num), na.rm = TRUE)) {
      rng <- range(z_num[is.finite(z_num)], na.rm = TRUE)

      if (rng[1] > 1e12 && rng[2] < 5e13) {
        dt_unix <- as.POSIXct(z_num / 1000, origin = "1970-01-01", tz = tz)
        score_unix <- sum(!is.na(dt_unix))
        unix_method <- "unix_milliseconds"
      } else if (rng[1] > 1e8 && rng[2] < 5e9) {
        dt_unix <- as.POSIXct(z_num, origin = "1970-01-01", tz = tz)
        score_unix <- sum(!is.na(dt_unix))
        unix_method <- "unix_seconds"
      }
    }

    dt_excel <- rep(as.POSIXct(NA, tz = tz), length(z_chr))
    score_excel <- -Inf
    excel_system <- NA_character_

    if (excel_dates != "none" && numeric_like && any(is.finite(z_num), na.rm = TRUE)) {
      chosen <- choose_excel_system(z_num, tz = tz, excel_dates = excel_dates)
      dt_excel <- chosen$dt
      excel_system <- chosen$system
      score_excel <- chosen$score
    }

    scores <- c(text = score_text, unix = score_unix, excel = score_excel)
    best <- names(which.max(scores))

    if (best == "excel" && score_excel > 0) {
      return(list(
        dt = dt_excel,
        method = "excel_serial",
        detected_format = paste0("Excel serial date (", excel_system, " system)"),
        excel_system = excel_system
      ))
    }

    if (best == "unix" && score_unix > 0) {
      return(list(
        dt = dt_unix,
        method = unix_method,
        detected_format = unix_method,
        excel_system = NA_character_
      ))
    }

    list(
      dt = dt_text,
      method = "text",
      detected_format = detected_format,
      excel_system = NA_character_
    )
  }

  format_time_of_day <- function(t) {
    if (inherits(t, "POSIXct") || inherits(t, "POSIXlt")) return(format(t, "%H:%M:%S"))
    if (inherits(t, "hms")) return(as.character(t))

    if (is.numeric(t)) {
      out <- rep(NA_character_, length(t))
      ok <- is.finite(t)

      # Excel fractional day, e.g. 0.5 = 12:00:00
      frac <- ok & t >= 0 & t < 1
      secs <- round(t[frac] * 86400)
      out[frac] <- sprintf("%02d:%02d:%02d", secs %/% 3600, (secs %% 3600) %/% 60, secs %% 60)

      # Decimal hours, e.g. 13.5 = 13:30:00
      hrs <- ok & t >= 1 & t < 24
      secs2 <- round(t[hrs] * 3600)
      out[hrs] <- sprintf("%02d:%02d:%02d", secs2 %/% 3600, (secs2 %% 3600) %/% 60, secs2 %% 60)
      return(out)
    }

    z <- trimws(as.character(t))
    z[z %in% na] <- NA_character_
    z
  }

  combine_date_time <- function(date_vec, time_vec = NULL) {
    if (is.null(time_vec)) return(date_vec)

    # Numeric Excel date + numeric fractional day/time.
    if (is.numeric(date_vec) && is.numeric(time_vec)) {
      ok_time <- is.finite(time_vec) & time_vec >= 0 & time_vec < 1
      out <- date_vec
      out[ok_time] <- date_vec[ok_time] + time_vec[ok_time]
      return(out)
    }

    date_chr <- trimws(as.character(date_vec))
    time_chr <- format_time_of_day(time_vec)
    paste(date_chr, time_chr)
  }

  success_rate <- function(parsed, original) {
    valid_input <- nonempty(original)
    denom <- sum(valid_input)
    if (denom == 0) return(0)
    sum(!is.na(parsed) & valid_input) / denom
  }

  has_subdaily_variation <- function(dt) {
    ok <- !is.na(dt)
    if (sum(ok) == 0) return(FALSE)
    any(format(dt[ok], "%H:%M:%S") != "00:00:00")
  }

  looks_like_date_name <- function(nm) grepl("date|day|datum|fecha", nm, ignore.case = TRUE)
  looks_like_time_name <- function(nm) grepl("time|hour|heure|zeit|timestamp|datetime", nm, ignore.case = TRUE)

  auto_find_time <- function(dat) {
    nms <- names(dat)
    best <- list(
      mode = NA_character_,
      datetime_col = NA_integer_,
      date_col = NA_integer_,
      time_col = NA_integer_,
      parsed = rep(as.POSIXct(NA, tz = tz), nrow(dat)),
      score = -Inf,
      success = 0,
      method = NA_character_,
      detected_format = NA_character_,
      excel_system = NA_character_
    )

    # 1) Single-column datetime candidates.
    for (i in seq_along(nms)) {
      parsed <- parse_datetime_vector(dat[[i]])
      success <- success_rate(parsed$dt, dat[[i]])
      bonus <- 0
      if (looks_like_time_name(nms[i]) || looks_like_date_name(nms[i])) bonus <- bonus + 0.08
      if (has_subdaily_variation(parsed$dt)) bonus <- bonus + 0.03
      score <- success + bonus

      if (score > best$score) {
        best <- list(
          mode = "datetime_col",
          datetime_col = i,
          date_col = NA_integer_,
          time_col = NA_integer_,
          parsed = parsed$dt,
          score = score,
          success = success,
          method = parsed$method,
          detected_format = parsed$detected_format,
          excel_system = parsed$excel_system
        )
      }
    }

    # 2) Separate date + time candidates.
    # Prefer columns with date/time-like names, but also check all columns if needed.
    date_candidates <- which(vapply(nms, looks_like_date_name, logical(1)))
    time_candidates <- which(vapply(nms, looks_like_time_name, logical(1)))

    if (!length(date_candidates)) date_candidates <- seq_along(nms)
    if (!length(time_candidates)) time_candidates <- seq_along(nms)

    for (di in date_candidates) {
      for (ti in setdiff(time_candidates, di)) {
        combined <- combine_date_time(dat[[di]], dat[[ti]])
        parsed <- parse_datetime_vector(combined)
        success <- success_rate(parsed$dt, combined)
        bonus <- 0.04
        if (looks_like_date_name(nms[di])) bonus <- bonus + 0.06
        if (looks_like_time_name(nms[ti])) bonus <- bonus + 0.06
        if (has_subdaily_variation(parsed$dt)) bonus <- bonus + 0.05
        score <- success + bonus

        if (score > best$score) {
          best <- list(
            mode = "date_col + time_col",
            datetime_col = NA_integer_,
            date_col = di,
            time_col = ti,
            parsed = parsed$dt,
            score = score,
            success = success,
            method = parsed$method,
            detected_format = parsed$detected_format,
            excel_system = parsed$excel_system
          )
        }
      }
    }

    if (!is.finite(best$score) || best$success < min_time_success) {
      stop(
        "Could not detect a valid climate time column automatically. ",
        "Use datetime_col = 'your_datetime_column' or date_col = 'date' and time_col = 'time'.\n",
        "Detected columns were: ", paste(nms, collapse = ", "),
        call. = FALSE
      )
    }

    best
  }

  maybe_numeric <- function(v) {
    if (is.numeric(v)) return(v)
    if (inherits(v, "POSIXct") || inherits(v, "Date")) return(v)

    z <- trimws(as.character(v))
    z[z %in% na] <- NA_character_
    if (!is.null(dec) && !identical(dec, ".")) z <- gsub(dec, ".", z, fixed = TRUE)
    num <- suppressWarnings(as.numeric(z))

    ok_in <- sum(!is.na(z))
    ok_out <- sum(!is.na(num))

    if (ok_in > 0 && ok_out / ok_in >= 0.8) num else v
  }

  compute_resolution_diag <- function(dt) {
    dt_ok <- dt[!is.na(dt)]
    dt_ok <- sort(unique(dt_ok))

    if (length(dt_ok) < 2) {
      return(list(
        n_timestamps = length(dt_ok),
        start_time = if (length(dt_ok)) min(dt_ok) else as.POSIXct(NA, tz = tz),
        end_time = if (length(dt_ok)) max(dt_ok) else as.POSIXct(NA, tz = tz),
        dominant_step_seconds = NA_real_,
        dominant_step_minutes = NA_real_,
        irregular_intervals = NA_integer_,
        estimated_missing_steps = NA_integer_
      ))
    }

    diffs_sec <- round(as.numeric(diff(dt_ok), units = "secs"))
    diffs_sec <- diffs_sec[is.finite(diffs_sec) & diffs_sec > 0]
    tab <- sort(table(diffs_sec), decreasing = TRUE)
    dominant <- as.numeric(names(tab)[1])

    list(
      n_timestamps = length(dt_ok),
      start_time = min(dt_ok),
      end_time = max(dt_ok),
      dominant_step_seconds = dominant,
      dominant_step_minutes = dominant / 60,
      irregular_intervals = sum(diffs_sec != dominant),
      estimated_missing_steps = sum(pmax(round(diffs_sec / dominant) - 1L, 0L), na.rm = TRUE)
    )
  }

  make_report <- function() {
    list(
      source = input_source,
      file = if (is.character(x) && length(x) == 1) normalizePath(x, winslash = "/", mustWork = FALSE) else NA_character_,
      file_type = file_ext,
      separator = sep_used,
      decimal_mark = dec_used,
      datetime_source = datetime_source,
      datetime_column_name = datetime_name,
      date_column_name = date_name,
      time_column_name = time_name,
      datetime_parse_method = parse_method,
      datetime_format_detected = detected_format,
      excel_date_system = excel_system_used,
      tz = tz,
      n_rows_input = n_rows_input,
      n_rows_output = nrow(out),
      n_cols_output = ncol(out),
      climate_variables = setdiff(names(out), "TIME"),
      n_bad_datetime = n_bad_datetime,
      bad_datetime_rows = utils::head(which(is.na(out$TIME)), 20L),
      n_duplicated_timestamps = n_dup,
      duplicated_timestamps_dropped = drop_duplicate_time,
      resolution_diagnostics = resolution_diag
    )
  }

  # -------------------------------------------------------------------------
  # Read input
  # -------------------------------------------------------------------------
  input_source <- if (is.data.frame(x)) "data_frame" else "file"
  file_ext <- NA_character_
  sep_used <- sep %||% NA_character_
  dec_used <- dec %||% NA_character_

  if (is.data.frame(x)) {
    dat <- tibble::as_tibble(x)
  } else if (is.character(x) && length(x) == 1 && file.exists(x)) {
    file_ext <- tolower(tools::file_ext(x))

    if (file_ext %in% c("xls", "xlsx")) {
      if (!requireNamespace("readxl", quietly = TRUE)) {
        stop("Package 'readxl' is required to read Excel climate files.", call. = FALSE)
      }
      if (is.null(range)) {
        dat <- readxl::read_excel(path = x, sheet = sheet, na = na)
      } else {
        dat <- readxl::read_excel(path = x, sheet = sheet, range = range, na = na)
      }
      dat <- tibble::as_tibble(dat)
    } else if (file_ext %in% c("rds")) {
      dat <- readRDS(x)
      if (!is.data.frame(dat)) stop("RDS file must contain a data frame.", call. = FALSE)
      dat <- tibble::as_tibble(dat)
    } else if (file_ext %in% c("rda", "rdata")) {
      env <- new.env(parent = emptyenv())
      objs <- load(x, envir = env)
      if (length(objs) != 1) stop("RData/RDA file must contain exactly one object.", call. = FALSE)
      dat <- env[[objs]]
      if (!is.data.frame(dat)) stop("RData/RDA object must be a data frame.", call. = FALSE)
      dat <- tibble::as_tibble(dat)
    } else if (file_ext %in% c("csv", "txt", "tsv", "tab", "dat")) {
      lines <- readLines(x, n = 100L, warn = FALSE)
      lines <- lines[nzchar(trimws(lines))]
      if (!length(lines)) stop("Climate file appears to be empty.", call. = FALSE)

      if (is.null(sep)) {
        sep <- if (file_ext %in% c("tsv", "tab")) "\t" else sniff_sep(lines)
      }
      if (is.null(dec)) {
        data_lines <- if (length(lines) > 1) lines[-1] else lines
        dec <- sniff_dec(data_lines, sep = sep)
      }
      sep_used <- sep
      dec_used <- dec

      if (requireNamespace("readr", quietly = TRUE) && !identical(sep, "")) {
        dat <- readr::read_delim(
          file = x,
          delim = sep,
          na = na,
          locale = readr::locale(decimal_mark = dec),
          col_types = readr::cols(.default = readr::col_guess()),
          show_col_types = FALSE,
          progress = FALSE,
          trim_ws = TRUE
        )
        dat <- tibble::as_tibble(dat)
      } else {
        dat <- utils::read.table(
          file = x,
          sep = sep,
          dec = dec,
          header = header,
          na.strings = na,
          stringsAsFactors = FALSE,
          check.names = FALSE,
          fill = TRUE,
          comment.char = "",
          quote = "\"'"
        )
        dat <- tibble::as_tibble(dat)
      }
    } else {
      stop(
        "Unsupported climate file format: ", file_ext,
        ". Use csv, txt, tsv, tab, dat, xls, xlsx, rds, rda, or RData.",
        call. = FALSE
      )
    }
  } else {
    stop("'x' must be a data frame or a valid file path.", call. = FALSE)
  }

  if (trim_names) names(dat) <- trimws(names(dat))
  if (drop_empty_cols && ncol(dat) > 0) {
    keep <- vapply(dat, function(col) any(nonempty(col)), logical(1))
    dat <- dat[, keep, drop = FALSE]
  }

  n_rows_input <- nrow(dat)

  if (ncol(dat) < 2) {
    stop(
      "Climate input must contain at least one time column and one climate variable.\n",
      "Only ", ncol(dat), " column(s) were read. This usually means the delimiter was wrong.\n",
      "Try sep = ';', sep = ',', sep = '\\t', or check whether the file has a header.",
      call. = FALSE
    )
  }

  # -------------------------------------------------------------------------
  # Build TIME
  # -------------------------------------------------------------------------
  datetime_source <- NA_character_
  datetime_name <- NA_character_
  date_name <- NA_character_
  time_name <- NA_character_
  parse_method <- NA_character_
  detected_format <- NA_character_
  excel_system_used <- NA_character_
  drop_idx <- integer(0)

  if (!is.null(datetime_col) && !is.null(date_col)) {
    stop("Use either datetime_col or date_col/time_col, not both.", call. = FALSE)
  }

  if (!is.null(date_col)) {
    date_idx <- get_col_index(dat, date_col, "date_col")
    time_idx <- get_col_index(dat, time_col, "time_col")

    combined <- if (is.null(time_idx)) dat[[date_idx]] else combine_date_time(dat[[date_idx]], dat[[time_idx]])
    parsed <- parse_datetime_vector(combined)
    success <- success_rate(parsed$dt, combined)
    if (success < min_time_success) {
      stop("The supplied date_col/time_col could not be parsed reliably.", call. = FALSE)
    }

    TIME <- parsed$dt
    datetime_source <- if (is.null(time_idx)) "date_col" else "date_col + time_col"
    date_name <- names(dat)[date_idx]
    time_name <- if (is.null(time_idx)) NA_character_ else names(dat)[time_idx]
    datetime_name <- "TIME"
    parse_method <- parsed$method
    detected_format <- parsed$detected_format
    excel_system_used <- parsed$excel_system
    drop_idx <- sort(unique(c(date_idx, time_idx)))
  } else if (!is.null(datetime_col) || !is.null(time_col)) {
    # Backward-compatible: time_col alone means combined datetime column.
    dt_col <- datetime_col %||% time_col
    dt_idx <- get_col_index(dat, dt_col, if (!is.null(datetime_col)) "datetime_col" else "time_col")

    parsed <- parse_datetime_vector(dat[[dt_idx]])
    success <- success_rate(parsed$dt, dat[[dt_idx]])
    if (success < min_time_success) {
      stop("The supplied time/datetime column could not be parsed reliably.", call. = FALSE)
    }

    TIME <- parsed$dt
    datetime_source <- "datetime_col"
    datetime_name <- names(dat)[dt_idx]
    parse_method <- parsed$method
    detected_format <- parsed$detected_format
    excel_system_used <- parsed$excel_system
    drop_idx <- dt_idx
  } else {
    det <- auto_find_time(dat)
    TIME <- det$parsed
    datetime_source <- det$mode
    datetime_name <- if (!is.na(det$datetime_col)) names(dat)[det$datetime_col] else "TIME"
    date_name <- if (!is.na(det$date_col)) names(dat)[det$date_col] else NA_character_
    time_name <- if (!is.na(det$time_col)) names(dat)[det$time_col] else NA_character_
    parse_method <- det$method
    detected_format <- det$detected_format
    excel_system_used <- det$excel_system
    drop_idx <- if (det$mode == "date_col + time_col") {
      sort(unique(c(det$date_col, det$time_col)))
    } else {
      det$datetime_col
    }
  }

  if (all(is.na(TIME))) {
    stop(
      "Could not parse the climate time information. Use datetime_col = '...' ",
      "or date_col = '...' and time_col = '...'.",
      call. = FALSE
    )
  }

  other <- dat[, setdiff(seq_len(ncol(dat)), drop_idx), drop = FALSE]
  out <- tibble::as_tibble(cbind(tibble::tibble(TIME = TIME), other))

  # Keep selected variables.
  if (!is.null(vars)) {
    miss <- setdiff(vars, names(out))
    if (length(miss) > 0) {
      stop("Requested climate variables not found: ", paste(miss, collapse = ", "), call. = FALSE)
    }
    out <- out[, c("TIME", vars), drop = FALSE]
  }

  # Convert climate variables to numeric where sensible.
  for (nm in setdiff(names(out), "TIME")) {
    out[[nm]] <- maybe_numeric(out[[nm]])
  }

  # Sort and remove duplicate timestamps.
  out <- out[order(out$TIME, na.last = TRUE), , drop = FALSE]
  n_bad_datetime <- sum(is.na(out$TIME))

  dup <- duplicated(out$TIME) & !is.na(out$TIME)
  n_dup <- sum(dup)
  if (n_dup > 0 && drop_duplicate_time) {
    out <- out[!dup, , drop = FALSE]
    if (!quiet) warning("Duplicated climate timestamps were removed; first occurrence kept.", call. = FALSE)
  }

  if (ncol(out) < 2) {
    stop("After removing the time column, no climate variables remain.", call. = FALSE)
  }

  numeric_vars <- names(out)[vapply(out, is.numeric, logical(1))]
  numeric_vars <- setdiff(numeric_vars, "TIME")

  if (!length(numeric_vars) && !quiet) {
    warning(
      "No climate variables appear numeric after reading. Check sep, dec, and column formatting.",
      call. = FALSE
    )
  }

  resolution_diag <- if (detect_resolution) compute_resolution_diag(out$TIME) else NULL

  attr(out, "timezone") <- tz
  attr(out, "time_col_original") <- datetime_name
  attr(out, "date_col_original") <- date_name
  attr(out, "clock_time_col_original") <- time_name

  class(out) <- unique(c("dm_clim", class(out)))

  report <- make_report()
  attr(out, "import_report") <- report

  if (!quiet) {
    msg <- paste0(
      "Climate data standardized.\n",
      "Rows: ", nrow(out), "\n",
      "Time source: ", datetime_source,
      if (!is.na(datetime_name)) paste0(" [", datetime_name, "]") else "",
      if (!is.na(date_name)) paste0(" date=[", date_name, "]") else "",
      if (!is.na(time_name)) paste0(" time=[", time_name, "]") else "",
      "\nVariables: ", paste(setdiff(names(out), "TIME"), collapse = ", "), "\n",
      "Numeric variables: ", if (length(numeric_vars)) paste(numeric_vars, collapse = ", ") else "none", "\n",
      "Time range: ", format(min(out$TIME, na.rm = TRUE)), " to ", format(max(out$TIME, na.rm = TRUE))
    )
    message(msg)

    if (!is.na(sep_used)) message("Detected/used separator: '", sep_used, "'.")
    if (!is.na(dec_used)) message("Detected/used decimal mark: '", dec_used, "'.")
    if (!is.na(detected_format)) message("Detected datetime format: ", detected_format)
  }

  if (return_report) {
    return(list(data = out, report = report))
  }

  out
}



#' @title Daily climate summaries for dendrometer analyses
#'
#' @description
#' Computes daily climate summaries from climate time series so they can be
#' related to daily dendrometer summaries from \code{daily.data()}.
#'
#' The input can be a standardized climate object returned by
#' \code{read.climate()}, a raw data frame, or a valid file path accepted by
#' \code{read.climate()}.
#'
#' In addition to same-day climate summaries, the function can also compute
#' lagged and antecedent daily climate features from the summarized daily series:
#' \itemize{
#'   \item lagged values (e.g. previous 1 or 3 days)
#'   \item antecedent means over previous \code{n} days
#'   \item antecedent sums over previous \code{n} days
#' }
#'
#' @details
#' Lagged and antecedent features are calculated from the already summarized
#' daily climate columns. For example, if \code{VPD} is included in
#' \code{max_vars}, the daily summary column will be \code{VPD_max}. If this
#' column is listed in \code{lag_vars} and \code{lag_days = 1}, then the
#' additional column \code{VPD_max_lag_1d} is created.
#'
#' Antecedent means and sums exclude the current day. For example:
#' \deqn{x\_lagmean\_3d(t) = mean(x_{t-3}, x_{t-2}, x_{t-1})}
#' \deqn{x\_lagsum\_7d(t) = sum(x_{t-7}, \ldots, x_{t-1})}
#'
#' @param clim_df Climate input. This can be:
#'   \itemize{
#'     \item a standardized object returned by \code{read.climate()}
#'     \item a raw data frame with a time column in the first column or in a
#'           column named \code{TIME}
#'     \item a valid file path readable by \code{read.climate()}
#'   }
#' @param mean_vars Character vector of variables to summarize by mean.
#' @param min_vars Character vector of variables to summarize by minimum.
#' @param max_vars Character vector of variables to summarize by maximum.
#' @param sum_vars Character vector of variables to summarize by sum.
#' @param median_vars Character vector of variables to summarize by median.
#' @param lag_vars Character vector of summarized daily climate columns for
#'   which simple lagged values should be computed, e.g.
#'   \code{c("VPD_max", "SWC_mean")}.
#' @param lagmean_vars Character vector of summarized daily climate columns for
#'   which antecedent means should be computed, e.g.
#'   \code{c("Tair_mean", "VPD_mean")}.
#' @param lagsum_vars Character vector of summarized daily climate columns for
#'   which antecedent sums should be computed, e.g.
#'   \code{c("P_sum", "Rad_sum")}.
#' @param lag_days Integer vector giving lag/antecedent window sizes in days,
#'   e.g. \code{c(1, 3, 7)}.
#'
#' @return A tibble of class \code{"daily_clim"} with one row per day.
#'
#' @examples
#' \donttest{
#' data(ktm_clim_hourly)
#' clim_day <- dm_daily_clim(
#'   ktm_clim_hourly,
#'   mean_vars = c("temp", "VPD", "RH"),
#'   max_vars  = c("VPD"),
#'   sum_vars  = c("prec"),
#'   lag_vars = c("VPD_max", "temp_mean"),
#'   lagmean_vars = c("temp_mean", "VPD_mean", "RH_mean"),
#'   lagsum_vars = c("prec_sum"),
#'   lag_days = c(1, 3, 7)
#' )
#' head(clim_day, 5)
#' }
#'
#' @export
dm_daily_clim <- function(clim_df,
                          mean_vars = NULL,
                          min_vars = NULL,
                          max_vars = NULL,
                          sum_vars = NULL,
                          median_vars = NULL,
                          lag_vars = NULL,
                          lagmean_vars = NULL,
                          lagsum_vars = NULL,
                          lag_days = c(1, 3, 7)) {
  TIME <- DATE <- NULL

  dat <- .dm_prepare_climate(clim_df, verbose = FALSE)
  numeric_vars <- setdiff(names(dat)[vapply(dat, is.numeric, logical(1))], "TIME")

  if (all(c(
    is.null(mean_vars), is.null(min_vars), is.null(max_vars),
    is.null(sum_vars), is.null(median_vars)
  ))) {
    mean_vars <- numeric_vars
  }

  check_vars <- function(vars, allowed, arg_name) {
    if (is.null(vars)) return(character(0))
    miss <- setdiff(vars, allowed)
    if (length(miss) > 0) {
      stop(sprintf(
        "Unknown or invalid variable(s) in %s: %s",
        arg_name, paste(miss, collapse = ", ")
      ))
    }
    vars
  }

  # raw climate vars to summarize
  mean_vars   <- check_vars(mean_vars, numeric_vars, "mean_vars")
  min_vars    <- check_vars(min_vars, numeric_vars, "min_vars")
  max_vars    <- check_vars(max_vars, numeric_vars, "max_vars")
  sum_vars    <- check_vars(sum_vars, numeric_vars, "sum_vars")
  median_vars <- check_vars(median_vars, numeric_vars, "median_vars")

  # helper for antecedent windows excluding current day
  antecedent_stat <- function(x, n, FUN) {
    out <- rep(NA_real_, length(x))
    n <- as.integer(n)

    if (n <= 0) return(out)
    if (length(x) <= n) return(out)

    for (i in seq_along(x)) {
      if (i > n) {
        out[i] <- .dm_safe_agg(x[(i - n):(i - 1)], FUN)
      }
    }
    out
  }

  # build daily summary first
  out <- dat %>%
    dplyr::mutate(DATE = as.Date(TIME)) %>%
    dplyr::group_by(DATE) %>%
    dplyr::summarise(
      dplyr::across(
        dplyr::all_of(mean_vars),
        ~ .dm_safe_agg(.x, mean),
        .names = "{.col}_mean"
      ),
      dplyr::across(
        dplyr::all_of(min_vars),
        ~ .dm_safe_agg(.x, min),
        .names = "{.col}_min"
      ),
      dplyr::across(
        dplyr::all_of(max_vars),
        ~ .dm_safe_agg(.x, max),
        .names = "{.col}_max"
      ),
      dplyr::across(
        dplyr::all_of(sum_vars),
        ~ .dm_safe_agg(.x, sum),
        .names = "{.col}_sum"
      ),
      dplyr::across(
        dplyr::all_of(median_vars),
        ~ .dm_safe_agg(.x, stats::median),
        .names = "{.col}_median"
      ),
      .groups = "drop"
    )

  # validate requested lag-based daily summary columns
  summary_cols <- setdiff(names(out), "DATE")
  lag_vars     <- check_vars(lag_vars, summary_cols, "lag_vars")
  lagmean_vars <- check_vars(lagmean_vars, summary_cols, "lagmean_vars")
  lagsum_vars  <- check_vars(lagsum_vars, summary_cols, "lagsum_vars")

  if (length(lag_days) > 0) {
    if (any(!is.finite(lag_days)) || any(lag_days <= 0)) {
      stop("'lag_days' must contain positive finite integers.")
    }
    lag_days <- sort(unique(as.integer(lag_days)))
  }

  # simple lags
  if (length(lag_vars) > 0 && length(lag_days) > 0) {
    for (v in lag_vars) {
      for (d in lag_days) {
        out[[paste0(v, "_lag_", d, "d")]] <- dplyr::lag(out[[v]], n = d)
      }
    }
  }

  # antecedent means
  if (length(lagmean_vars) > 0 && length(lag_days) > 0) {
    for (v in lagmean_vars) {
      for (d in lag_days) {
        out[[paste0(v, "_lagmean_", d, "d")]] <- antecedent_stat(out[[v]], d, mean)
      }
    }
  }

  # antecedent sums
  if (length(lagsum_vars) > 0 && length(lag_days) > 0) {
    for (v in lagsum_vars) {
      for (d in lag_days) {
        out[[paste0(v, "_lagsum_", d, "d")]] <- antecedent_stat(out[[v]], d, sum)
      }
    }
  }

  class(out) <- c("daily_clim", class(out))
  out
}


#' @title Subdaily climate features for dendrometer analyses
#'
#' @description
#' Computes rolling-window and lagged climate features at subdaily resolution
#' for direct linkage with point-level dendrometer outputs such as
#' \code{ZG_phase} and \code{SC_phase}.
#'
#' The input can be a standardized climate object returned by
#' \code{read.climate()}, a raw data frame, or a valid file path accepted by
#' \code{read.climate()}.
#'
#' @details
#' The function learns the temporal resolution automatically from the median
#' time step in the \code{TIME} column. It works with hourly as well as
#' minute-resolution data (for example 60-, 30-, 15-, 10-, or 5-minute data).
#'
#' Rolling windows and lags are provided in hours and may be fractional:
#' \itemize{
#'   \item \code{0.25} = 15 minutes
#'   \item \code{0.5} = 30 minutes
#'   \item \code{1} = 1 hour
#'   \item \code{3} = 3 hours
#' }
#'
#' If the user requests a rolling window or lag that is smaller than the
#' inferred climate resolution, the function stops with an error.
#'
#' If a requested window is not an exact multiple of the inferred resolution,
#' it is rounded to the nearest number of time steps and a warning is issued.
#'
#' @param clim_df Climate input. This can be:
#'   \itemize{
#'     \item a standardized object returned by \code{read.climate()}
#'     \item a raw data frame with a time column in the first column or in a
#'           column named \code{TIME}
#'     \item a valid file path readable by \code{read.climate()}
#'   }
#' @param mean_vars Variables for rolling means.
#' @param sum_vars Variables for rolling sums.
#' @param lag_vars Variables for lagged features.
#' @param roll_hours Numeric vector of rolling-window sizes in hours.
#'   Fractional values are allowed, e.g. \code{0.5} for 30 minutes.
#' @param lag_hours Numeric vector of lag sizes in hours.
#'   Fractional values are allowed, e.g. \code{0.25} for 15 minutes.
#' @examples
#' \donttest{
#' data(ktm_clim_hourly)
#'
#' clim_sub <- dm_subdaily_clim(
#'   ktm_clim_hourly,
#'   mean_vars = c("temp", "VPD", "RH"),
#'   sum_vars  = c("prec"),
#'   lag_vars  = c("temp", "VPD", "RH"),
#'   roll_hours = c(1, 3, 6, 24),
#'   lag_hours  = c(1, 3, 6, 24)
#' )
#'
#' head(clim_sub)
#' attr(clim_sub, "resolution_hours")
#' }
#' @return
#' A tibble of class \code{"subdaily_clim"} with timestamp-level climate
#' features added. The inferred temporal resolution in hours is stored in
#' \code{attr(x, "resolution_hours")}.
#'
#' @export
dm_subdaily_clim <- function(clim_df,
                             mean_vars = NULL,
                             sum_vars = NULL,
                             lag_vars = NULL,
                             roll_hours = c(3, 6, 24),
                             lag_hours = c(1, 3, 6, 24)) {

  dat <- .dm_prepare_climate(clim_df, verbose = FALSE)
  numeric_vars <- setdiff(names(dat)[vapply(dat, is.numeric, logical(1))], "TIME")

  if (all(c(is.null(mean_vars), is.null(sum_vars), is.null(lag_vars)))) {
    mean_vars <- numeric_vars
    lag_vars <- numeric_vars
  }

  check_vars <- function(vars, arg_name) {
    if (is.null(vars)) return(character(0))
    miss <- setdiff(vars, numeric_vars)
    if (length(miss) > 0) {
      stop(sprintf("Unknown or non-numeric variable(s) in %s: %s", arg_name, paste(miss, collapse = ", ")))
    }
    vars
  }

  mean_vars <- check_vars(mean_vars, "mean_vars")
  sum_vars  <- check_vars(sum_vars, "sum_vars")
  lag_vars  <- check_vars(lag_vars, "lag_vars")

  res_h <- .dm_resolution_hours(dat$TIME)
  if (is.na(res_h)) {
    stop("Could not infer temporal resolution from the climate time column.")
  }

  if (.dm_is_irregular(dat$TIME)) {
    warning("Climate data appear to have irregular temporal resolution. Rolling/lagged features are based on the median resolution.")
  }

  if (length(roll_hours) > 0 && any(roll_hours < res_h)) {
    stop(
      sprintf(
        "Requested rolling window(s) below the inferred climate resolution (%.6f h): %s",
        res_h,
        paste(roll_hours[roll_hours < res_h], collapse = ", ")
      )
    )
  }

  if (length(lag_hours) > 0 && any(lag_hours < res_h)) {
    stop(
      sprintf(
        "Requested lag(s) below the inferred climate resolution (%.6f h): %s",
        res_h,
        paste(lag_hours[lag_hours < res_h], collapse = ", ")
      )
    )
  }

  out <- dat

  # rolling means
  if (length(mean_vars) > 0 && length(roll_hours) > 0) {
    for (v in mean_vars) {
      for (h in roll_hours) {
        n_steps <- max(1L, as.integer(round(h / res_h)))
        if (abs((h / res_h) - n_steps) > 0.05) {
          warning(sprintf(
            "Rolling window %sh for '%s' is not an exact multiple of the inferred resolution (%.6f h); rounded to %s steps.",
            h, v, res_h, n_steps
          ))
        }
        out[[paste0(v, "_rollmean_", h, "h")]] <- .dm_roll_right(out[[v]], n_steps, mean)
      }
    }
  }

  # rolling sums
  if (length(sum_vars) > 0 && length(roll_hours) > 0) {
    for (v in sum_vars) {
      for (h in roll_hours) {
        n_steps <- max(1L, as.integer(round(h / res_h)))
        if (abs((h / res_h) - n_steps) > 0.05) {
          warning(sprintf(
            "Rolling window %sh for '%s' is not an exact multiple of the inferred resolution (%.6f h); rounded to %s steps.",
            h, v, res_h, n_steps
          ))
        }
        out[[paste0(v, "_rollsum_", h, "h")]] <- .dm_roll_right(out[[v]], n_steps, sum)
      }
    }
  }

  # lags
  if (length(lag_vars) > 0 && length(lag_hours) > 0) {
    for (v in lag_vars) {
      for (h in lag_hours) {
        n_steps <- max(1L, as.integer(round(h / res_h)))
        if (abs((h / res_h) - n_steps) > 0.05) {
          warning(sprintf(
            "Lag window %sh for '%s' is not an exact multiple of the inferred resolution (%.6f h); rounded to %s steps.",
            h, v, res_h, n_steps
          ))
        }
        out[[paste0(v, "_lag_", h, "h")]] <- .dm_lag_vec(out[[v]], n_steps)
      }
    }
  }

  attr(out, "resolution_hours") <- res_h
  class(out) <- c("subdaily_clim", class(out))
  out
}

Try the dendRoAnalyst package in your browser

Any scripts or data that you put into this service are public.

dendRoAnalyst documentation built on May 20, 2026, 5:07 p.m.