Nothing
#' @title Reading dendrometer data
#' @description
#' Reads dendrometer data from \code{.csv}, \code{.txt}, \code{.tsv}, or \code{.xlsx} files,
#' automatically parsing the datetime information and converting it to
#' \code{\%Y-\%m-\%d \%H:\%M:\%S} in the requested timezone.
#'
#' Supports:
#' \itemize{
#' \item automatic delimiter detection for text files,
#' \item real decimal-mark auto-detection for text files,
#' \item Excel serial date support (\code{1900} / \code{1904} systems),
#' \item separate \code{date_col} + \code{time_col},
#' \item optional import report output,
#' \item optional time-resolution diagnostics.
#' }
#'
#' @param file Path to file (\code{.csv}, \code{.txt}, \code{.tsv}, \code{.xlsx}).
#' @param sep Optional delimiter for text files. If \code{NULL}, auto-detect
#' among comma, semicolon, tab, and pipe.
#' @param dec Optional decimal mark for text files. If \code{NULL}, auto-detect
#' between \code{"."} and \code{","}.
#' @param datetime_col Integer or name of the datetime column (default \code{1}).
#' Ignored if \code{date_col} is provided.
#' @param date_col Optional integer or name of a date column.
#' @param time_col Optional integer or name of a time column. Used together with
#' \code{date_col}. If \code{NULL}, only the date column is parsed and
#' \code{"00:00:00"} can be appended if \code{assume_midnight = TRUE}.
#' @param tz Time zone for parsed datetimes (default \code{"UTC"}).
#' @param sheet Excel sheet name or index (for \code{.xlsx}; default \code{NULL} = first sheet).
#' @param range Excel cell range (optional).
#' @param na Character vector of strings to treat as NA.
#' @param assume_midnight Logical; if \code{TRUE}, rows with only a date get \code{"00:00:00"}.
#' @param orders Optional vector of \pkg{lubridate} orders to try. If \code{NULL},
#' a comprehensive default set is used.
#' @param excel_dates Character. One of \code{"auto"}, \code{"none"}, \code{"1900"},
#' or \code{"1904"}. Controls handling of numeric Excel serial dates.
#' @param drop_dup_times Logical; if \code{TRUE}, drop duplicated timestamps
#' (keep first) with a warning/message.
#' @param detect_resolution Logical; if \code{TRUE}, compute basic time-resolution diagnostics.
#' @param return_report Logical; if \code{TRUE}, return a list with
#' \code{$data} and \code{$report}. If \code{FALSE}, return the tibble only.
#' In both cases, the report is attached as \code{attr(x, "import_report")}.
#' @param quiet Logical; if \code{TRUE}, suppress informational messages.
#'
#' @return
#' If \code{return_report = FALSE}, a tibble with a POSIXct first column and the
#' remaining data columns unchanged.
#'
#' If \code{return_report = TRUE}, a list with:
#' \describe{
#' \item{\code{$data}}{The imported tibble.}
#' \item{\code{$report}}{A structured import report.}
#' }
#'
#' @importFrom lubridate parse_date_time ymd_hms
#' @importFrom readxl read_excel
#' @importFrom tibble as_tibble tibble
#' @importFrom utils read.csv read.table head
#' @importFrom tools file_ext
#' @export
read.dendrometer <- function(
file,
sep = NULL,
dec = NULL,
datetime_col = 1,
date_col = NULL,
time_col = NULL,
tz = "UTC",
sheet = NULL,
range = NULL,
na = c("", "NA", "NaN", "nan", "null", "NULL", "-9999"),
assume_midnight = TRUE,
orders = NULL,
excel_dates = c("auto", "none", "1900", "1904"),
drop_dup_times = TRUE,
detect_resolution = FALSE,
return_report = FALSE,
quiet = TRUE
) {
excel_dates <- match.arg(excel_dates)
# ---------------------------------------------------------------------------
# helpers
# ---------------------------------------------------------------------------
default_orders <- function() {
c(
"Y-m-d H:M:S", "Y-m-d H:M", "Y-m-d",
"Y/m/d H:M:S", "Y/m/d H:M", "Y/m/d",
"Y.m.d H:M:S", "Y.m.d H:M", "Y.m.d",
"d.m.Y H:M:S", "d.m.Y H:M", "d.m.Y",
"d-m-Y H:M:S", "d-m-Y H:M", "d-m-Y",
"d/m/Y H:M:S", "d/m/Y H:M", "d/m/Y",
"m/d/Y H:M:S", "m/d/Y H:M", "m/d/Y",
"Y-m-d I:M p", "Y-m-d I:M:S p",
"m/d/Y I:M p", "m/d/Y I:M:S p",
"d/m/Y I:M p", "d/m/Y I:M:S p",
"Ymd HMS", "Ymd HM", "Ymd",
"dmY HMS", "dmY HM", "dmY",
"mdY HMS", "mdY HM", "mdY"
)
}
get_col_index <- function(data, col, arg_name) {
if (is.null(col)) return(NULL)
if (is.character(col)) {
idx <- match(col, names(data))
if (is.na(idx)) stop(arg_name, " '", col, "' not found in columns.")
return(idx)
}
idx <- as.integer(col)
if (!is.finite(idx) || idx < 1 || idx > ncol(data)) {
stop(arg_name, " index out of bounds.")
}
idx
}
sniff_sep <- function(line) {
cands <- c("," = ",", ";" = ";", "\t" = "\t", "|" = "|")
counts <- vapply(
cands,
function(s) length(strsplit(line, s, fixed = TRUE)[[1]]) - 1L,
integer(1)
)
if (max(counts) == 0) "," else names(which.max(counts))
}
sniff_dec <- function(lines, sep) {
if (length(lines) == 0) return(".")
# If comma is delimiter, decimal comma is highly unlikely in plain delimited files.
if (identical(sep, ",")) return(".")
split_lines <- strsplit(lines, sep, fixed = TRUE)
tokens <- unlist(split_lines, use.names = FALSE)
tokens <- trimws(tokens)
tokens <- tokens[nzchar(tokens)]
# Simple numeric token patterns
comma_hits <- sum(grepl("^-?[0-9]+,[0-9]+$", tokens))
dot_hits <- sum(grepl("^-?[0-9]+\\.[0-9]+$", tokens))
if (comma_hits > dot_hits) "," else "."
}
detect_format_string <- function(x_chr) {
x_chr <- trimws(as.character(x_chr))
samp <- x_chr[!is.na(x_chr) & nzchar(x_chr)]
samp <- utils::head(samp, 50)
if (!length(samp)) return(NA_character_)
pats <- list(
"^\\d{1,2}/\\d{1,2}/\\d{4} \\d{1,2}:\\d{1,2}:\\d{1,2}$" = "%m/%d/%Y %H:%M:%S",
"^\\d{1,2}/\\d{1,2}/\\d{4} \\d{1,2}:\\d{1,2}$" = "%m/%d/%Y %H:%M",
"^\\d{4}-\\d{1,2}-\\d{1,2} \\d{1,2}:\\d{1,2}:\\d{1,2}$" = "%Y-%m-%d %H:%M:%S",
"^\\d{4}-\\d{1,2}-\\d{1,2} \\d{1,2}:\\d{1,2}$" = "%Y-%m-%d %H:%M",
"^\\d{4}-\\d{1,2}-\\d{1,2}$" = "%Y-%m-%d",
"^\\d{4}/\\d{1,2}/\\d{1,2} \\d{1,2}:\\d{1,2}:\\d{1,2}$" = "%Y/%m/%d %H:%M:%S",
"^\\d{4}/\\d{1,2}/\\d{1,2} \\d{1,2}:\\d{1,2}$" = "%Y/%m/%d %H:%M",
"^\\d{4}/\\d{1,2}/\\d{1,2}$" = "%Y/%m/%d",
"^\\d{4}\\.\\d{1,2}\\.\\d{1,2} \\d{1,2}:\\d{1,2}:\\d{1,2}$" = "%Y.%m.%d %H:%M:%S",
"^\\d{4}\\.\\d{1,2}\\.\\d{1,2} \\d{1,2}:\\d{1,2}$" = "%Y.%m.%d %H:%M",
"^\\d{4}\\.\\d{1,2}\\.\\d{1,2}$" = "%Y.%m.%d",
"^\\d{1,2}\\.\\d{1,2}\\.\\d{4} \\d{1,2}:\\d{1,2}:\\d{1,2}$" = "%d.%m.%Y %H:%M:%S",
"^\\d{1,2}\\.\\d{1,2}\\.\\d{4} \\d{1,2}:\\d{1,2}$" = "%d.%m.%Y %H:%M",
"^\\d{1,2}\\.\\d{1,2}\\.\\d{4}$" = "%d.%m.%Y",
"^\\d{1,2}-\\d{1,2}-\\d{4} \\d{1,2}:\\d{1,2}:\\d{1,2}$" = "%d-%m-%Y %H:%M:%S",
"^\\d{1,2}-\\d{1,2}-\\d{4} \\d{1,2}:\\d{1,2}$" = "%d-%m-%Y %H:%M",
"^\\d{1,2}-\\d{1,2}-\\d{4}$" = "%d-%m-%Y",
"^\\d{1,2}/\\d{1,2}/\\d{4} \\d{1,2}:\\d{1,2} [APap][Mm]$" = "%m/%d/%Y %I:%M %p",
"^\\d{4}-\\d{1,2}-\\d{1,2} \\d{1,2}:\\d{1,2} [APap][Mm]$" = "%Y-%m-%d %I:%M %p"
)
hits <- vapply(names(pats), function(rx) mean(grepl(rx, samp)), numeric(1))
if (max(hits) > 0.9) pats[[which.max(hits)]] else NA_character_
}
append_midnight_if_needed <- function(x_chr, assume_midnight) {
if (!assume_midnight) return(x_chr)
x_chr <- trimws(x_chr)
only_date <- grepl("^\\d{4}[-/.]\\d{1,2}[-/.]\\d{1,2}$", x_chr) |
grepl("^\\d{1,2}[-/.]\\d{1,2}[-/.]\\d{4}$", x_chr) |
grepl("^\\d{8}$", x_chr)
x_chr[only_date] <- paste0(x_chr[only_date], " 00:00:00")
x_chr
}
excel_serial_to_posix <- function(x_num, tz, system = c("1900", "1904")) {
system <- match.arg(system)
origin <- if (system == "1900") "1899-12-30" else "1904-01-01"
out <- as.POSIXct(x_num * 86400, origin = origin, tz = tz)
out
}
choose_excel_system <- function(x_num, tz, excel_dates) {
systems <- if (excel_dates == "auto") c("1900", "1904") else excel_dates
best <- list(
system = NA_character_,
dt = rep(as.POSIXct(NA, tz = tz), length(x_num)),
score = -Inf
)
for (sys in systems) {
dt <- suppressWarnings(excel_serial_to_posix(x_num, tz = tz, system = sys))
yrs <- suppressWarnings(as.integer(format(dt, "%Y")))
score <- sum(!is.na(dt) & yrs >= 1980 & yrs <= 2100)
if (score > best$score) {
best$system <- sys
best$dt <- dt
best$score <- score
}
}
best
}
parse_datetime_vector <- function(x, tz, assume_midnight, orders, excel_dates) {
if (is.null(orders)) orders <- default_orders()
# Already parsed classes
if (inherits(x, "POSIXct")) {
return(list(
dt = as.POSIXct(x, tz = tz),
parse_method = "POSIXct",
detected_format = "%Y-%m-%d %H:%M:%S",
excel_system = NA_character_
))
}
if (inherits(x, "Date")) {
dt <- as.POSIXct(x, tz = tz)
return(list(
dt = dt,
parse_method = "Date",
detected_format = "%Y-%m-%d",
excel_system = NA_character_
))
}
detected_format <- NA_character_
excel_system <- NA_character_
# Text parse attempt
x_chr <- trimws(as.character(x))
x_chr <- append_midnight_if_needed(x_chr, assume_midnight = assume_midnight)
detected_format <- detect_format_string(x_chr)
dt_text <- suppressWarnings(
lubridate::parse_date_time(
x_chr,
orders = orders,
tz = tz,
quiet = TRUE
)
)
score_text <- sum(!is.na(dt_text))
# Excel serial attempt
dt_excel <- rep(as.POSIXct(NA, tz = tz), length(x_chr))
score_excel <- -Inf
if (excel_dates != "none") {
numeric_like <- is.numeric(x) || all(
is.na(x_chr) | !nzchar(x_chr) | grepl("^-?[0-9]+([.][0-9]+)?$", x_chr)
)
if (numeric_like) {
x_num <- suppressWarnings(as.numeric(x_chr))
chosen <- choose_excel_system(x_num, tz = tz, excel_dates = excel_dates)
dt_excel <- chosen$dt
excel_system <- chosen$system
score_excel <- chosen$score
}
}
# Decide
use_excel <- FALSE
if (score_excel > score_text && score_excel > 0) {
use_excel <- TRUE
} else if (score_text == 0 && score_excel > 0) {
use_excel <- TRUE
}
if (use_excel) {
list(
dt = dt_excel,
parse_method = "excel_serial",
detected_format = paste0("Excel serial date (", excel_system, " system)"),
excel_system = excel_system
)
} else {
list(
dt = dt_text,
parse_method = "text",
detected_format = detected_format,
excel_system = NA_character_
)
}
}
compute_resolution_diag <- function(dt) {
dt_ok <- dt[!is.na(dt)]
dt_ok <- sort(unique(dt_ok))
if (length(dt_ok) < 2) {
return(list(
n_timestamps = length(dt_ok),
start_time = if (length(dt_ok)) min(dt_ok) else as.POSIXct(NA),
end_time = if (length(dt_ok)) max(dt_ok) else as.POSIXct(NA),
median_step_seconds = NA_real_,
median_step_minutes = NA_real_,
dominant_step_seconds = NA_real_,
dominant_step_minutes = NA_real_,
irregular_intervals = NA_integer_,
estimated_missing_steps = NA_integer_,
step_table = tibble::tibble(
step_seconds = numeric(0),
n = integer(0),
proportion = numeric(0)
)
))
}
diffs_sec <- as.numeric(diff(dt_ok), units = "secs")
diffs_sec <- round(diffs_sec)
step_tab <- as.data.frame(table(diffs_sec), stringsAsFactors = FALSE)
names(step_tab) <- c("step_seconds", "n")
step_tab$step_seconds <- as.numeric(as.character(step_tab$step_seconds))
step_tab$n <- as.integer(step_tab$n)
step_tab <- step_tab[order(step_tab$n, decreasing = TRUE), , drop = FALSE]
step_tab$proportion <- step_tab$n / sum(step_tab$n)
step_tab <- tibble::as_tibble(step_tab)
dominant_step <- if (nrow(step_tab)) step_tab$step_seconds[1] else NA_real_
median_step <- stats::median(diffs_sec, na.rm = TRUE)
irregular_intervals <- if (is.finite(dominant_step)) {
sum(diffs_sec != dominant_step, na.rm = TRUE)
} else {
NA_integer_
}
estimated_missing_steps <- if (is.finite(dominant_step) && dominant_step > 0) {
sum(pmax(round(diffs_sec / dominant_step) - 1L, 0L), na.rm = TRUE)
} else {
NA_integer_
}
list(
n_timestamps = length(dt_ok),
start_time = min(dt_ok),
end_time = max(dt_ok),
median_step_seconds = median_step,
median_step_minutes = median_step / 60,
dominant_step_seconds = dominant_step,
dominant_step_minutes = dominant_step / 60,
irregular_intervals = irregular_intervals,
estimated_missing_steps = estimated_missing_steps,
step_table = step_tab
)
}
make_report <- function(
file, ext, sep_used, dec_used, datetime_mode, datetime_name,
parse_method, detected_format, excel_system, tz,
n_rows_input, n_rows_output, n_cols_output,
n_bad_datetime, bad_rows, n_dup, dup_dropped,
resolution_diag
) {
list(
file = normalizePath(file, winslash = "/", mustWork = FALSE),
file_type = ext,
separator = sep_used,
decimal_mark = dec_used,
datetime_source = datetime_mode,
datetime_column_name = datetime_name,
datetime_parse_method = parse_method,
datetime_format_detected = detected_format,
excel_date_system = excel_system,
target_format = "%Y-%m-%d %H:%M:%S",
tz = tz,
n_rows_input = n_rows_input,
n_rows_output = n_rows_output,
n_cols_output = n_cols_output,
n_bad_datetime = n_bad_datetime,
bad_datetime_rows = bad_rows,
n_duplicated_timestamps = n_dup,
duplicated_timestamps_dropped = dup_dropped,
resolution_diagnostics = resolution_diag
)
}
# ---------------------------------------------------------------------------
# read file
# ---------------------------------------------------------------------------
ext <- tolower(tools::file_ext(file))
if (!ext %in% c("csv", "txt", "tsv", "xlsx")) {
stop("Unsupported file extension: ", ext, " (use csv/txt/tsv/xlsx)")
}
sep_used <- NA_character_
dec_used <- NA_character_
if (ext == "xlsx") {
df <- if (is.null(sheet) && is.null(range)) {
readxl::read_excel(file, na = na)
} else if (is.null(range)) {
readxl::read_excel(file, sheet = sheet, na = na)
} else {
readxl::read_excel(file, sheet = sheet, range = range, na = na)
}
df <- tibble::as_tibble(df)
} else {
if (ext == "tsv" && is.null(sep)) sep <- "\t"
lines <- readLines(file, n = 100L, warn = FALSE)
lines <- lines[nzchar(trimws(lines))]
if (!length(lines)) stop("File appears to be empty.")
first_line <- lines[1]
data_lines <- if (length(lines) > 1) lines[-1] else character(0)
if (is.null(sep)) sep <- sniff_sep(first_line)
if (is.null(dec)) dec <- sniff_dec(data_lines, sep = sep)
sep_used <- sep
dec_used <- dec
if (requireNamespace("readr", quietly = TRUE)) {
df <- readr::read_delim(
file = file,
delim = sep,
na = na,
locale = readr::locale(decimal_mark = dec),
show_col_types = FALSE,
progress = FALSE
)
df <- tibble::as_tibble(df)
} else {
if (sep %in% c(",", ";", "|")) {
df <- utils::read.csv(
file,
header = TRUE,
sep = sep,
dec = dec,
na.strings = na,
check.names = FALSE,
stringsAsFactors = FALSE
)
} else {
df <- utils::read.table(
file,
header = TRUE,
sep = sep,
dec = dec,
na.strings = na,
check.names = FALSE,
stringsAsFactors = FALSE
)
}
df <- tibble::as_tibble(df)
}
}
if (ncol(df) < 1) stop("No columns found in file.")
n_rows_input <- nrow(df)
# ---------------------------------------------------------------------------
# build datetime
# ---------------------------------------------------------------------------
use_separate_cols <- !is.null(date_col) || !is.null(time_col)
if (use_separate_cols) {
if (is.null(date_col)) {
stop("If 'time_col' is supplied, 'date_col' must also be supplied.")
}
date_idx <- get_col_index(df, date_col, "date_col")
time_idx <- get_col_index(df, time_col, "time_col")
date_name <- names(df)[date_idx]
out_name <- if (is.null(time_idx)) date_name else "DateTime"
raw_date <- trimws(as.character(df[[date_idx]]))
raw_datetime <- if (is.null(time_idx)) {
raw_date
} else {
raw_time <- trimws(as.character(df[[time_idx]]))
paste(raw_date, raw_time)
}
parsed <- parse_datetime_vector(
raw_datetime,
tz = tz,
assume_midnight = assume_midnight,
orders = orders,
excel_dates = excel_dates
)
dt <- parsed$dt
drop_idx <- sort(unique(c(date_idx, time_idx)))
other_df <- df[, setdiff(seq_len(ncol(df)), drop_idx), drop = FALSE]
time_df <- tibble::tibble(..dt.. = dt)
names(time_df)[1] <- out_name
df <- tibble::as_tibble(cbind(time_df, other_df))
datetime_mode <- if (is.null(time_idx)) "date_col" else "date_col + time_col"
datetime_name <- out_name
} else {
dt_idx <- get_col_index(df, datetime_col, "datetime_col")
old_name <- names(df)[dt_idx]
parsed <- parse_datetime_vector(
df[[dt_idx]],
tz = tz,
assume_midnight = assume_midnight,
orders = orders,
excel_dates = excel_dates
)
dt <- parsed$dt
df[[dt_idx]] <- dt
if (dt_idx != 1L) {
df <- df[, c(dt_idx, setdiff(seq_len(ncol(df)), dt_idx)), drop = FALSE]
}
datetime_mode <- "datetime_col"
datetime_name <- old_name
}
# ---------------------------------------------------------------------------
# reporting messages
# ---------------------------------------------------------------------------
n_bad_datetime <- sum(is.na(df[[1]]))
bad_rows <- which(is.na(df[[1]]))
bad_rows <- utils::head(bad_rows, 20L)
if (!quiet) {
if (!is.na(parsed$detected_format)) {
message(
"Detected datetime format: '", parsed$detected_format,
"'. Converted to '%Y-%m-%d %H:%M:%S'."
)
} else {
message("Datetime parsed and converted to '%Y-%m-%d %H:%M:%S'.")
}
if (!is.na(sep_used)) {
message("Detected separator: '", sep_used, "'.")
}
if (!is.na(dec_used)) {
message("Detected decimal mark: '", dec_used, "'.")
}
if (parsed$parse_method == "excel_serial") {
message("Datetime interpreted as Excel serial date (", parsed$excel_system, " system).")
}
if (n_bad_datetime > 0) {
message(n_bad_datetime, " rows could not be parsed as datetime and remain NA.")
}
}
# ---------------------------------------------------------------------------
# sort + duplicates
# ---------------------------------------------------------------------------
ord <- order(df[[1]], na.last = TRUE)
df <- df[ord, , drop = FALSE]
dup <- duplicated(df[[1]]) & !is.na(df[[1]])
n_dup <- sum(dup)
if (n_dup > 0) {
if (drop_dup_times) {
if (!quiet) {
message(
"Found ", n_dup,
" duplicated timestamps; keeping first occurrence and dropping the rest."
)
}
df <- df[!dup, , drop = FALSE]
} else {
if (!quiet) {
message(
"Found ", n_dup,
" duplicated timestamps; leaving as-is (set drop_dup_times = TRUE to drop)."
)
}
}
}
# ---------------------------------------------------------------------------
# resolution diagnostics
# ---------------------------------------------------------------------------
resolution_diag <- NULL
if (detect_resolution) {
resolution_diag <- compute_resolution_diag(df[[1]])
if (!quiet && !is.null(resolution_diag)) {
if (is.finite(resolution_diag$dominant_step_minutes)) {
message(
"Detected dominant time step: ",
format(round(resolution_diag$dominant_step_minutes, 4), trim = TRUE),
" minutes."
)
}
}
}
# ---------------------------------------------------------------------------
# import report
# ---------------------------------------------------------------------------
report <- make_report(
file = file,
ext = ext,
sep_used = sep_used,
dec_used = dec_used,
datetime_mode = datetime_mode,
datetime_name = names(df)[1],
parse_method = parsed$parse_method,
detected_format = parsed$detected_format,
excel_system = parsed$excel_system,
tz = tz,
n_rows_input = n_rows_input,
n_rows_output = nrow(df),
n_cols_output = ncol(df),
n_bad_datetime = n_bad_datetime,
bad_rows = bad_rows,
n_dup = n_dup,
dup_dropped = drop_dup_times,
resolution_diag = resolution_diag
)
attr(df, "import_report") <- report
if (return_report) {
return(list(
data = df,
report = report
))
}
df
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.