Nothing
#!/usr/bin/env Rscript
# Read an NHIS-style initiation.csv and report how read.csv() treats cohort columns.
#
# - Default read.csv(..., header = TRUE) uses check.names = TRUE, so bare year
# headers like 1950 become X1950 (syntactic names).
# - With check.names = FALSE, names stay as in the file (e.g. "1950").
#
# Usage:
# Rscript tools/check-initiation-csv-read.R path/to/initiation.csv
#
# With no arguments, uses bundled test CSV if your working directory is the
# shg-r package root (also works when `source()`-ing this file from that root).
#
# Example (from shg-r root):
# Rscript tools/check-initiation-csv-read.R tests/testdata/NHIS-1965-2018/csv-complete/initiation.csv
trailing <- commandArgs(trailingOnly = TRUE)
if (length(trailing)) {
path <- trailing[[1L]]
} else {
rel <- "tests/testdata/NHIS-1965-2018/csv-complete/initiation.csv"
path <- file.path(getwd(), rel)
if (!file.exists(path)) {
stop(
"No path argument and default not found.\n",
" Default tried: ", path, "\n",
" Usage: Rscript tools/check-initiation-csv-read.R path/to/initiation.csv\n",
" Or setwd() to the shg-r package root, then source() or re-run without args.",
call. = FALSE
)
}
message("No path supplied; using default: ", path)
}
if (!file.exists(path)) {
stop("File not found: ", path, call. = FALSE)
}
raw_lines <- readLines(path, n = 1L, warn = FALSE)
if (!nzchar(raw_lines)) {
stop("Empty file: ", path, call. = FALSE)
}
# First row = header; split like a naive CSV (no embedded commas in field names for SHG tables).
raw_fields <- strsplit(raw_lines, ",", fixed = TRUE)[[1L]]
raw_fields <- sub("^\\s+", "", sub("\\s+$", "", raw_fields))
quoted_name <- grepl("^\".*\"$", raw_fields)
if (any(quoted_name)) {
message("Some header fields appear quoted in the first line (count): ", sum(quoted_name))
} else {
message("First-line header fields: no double-quote wrappers detected (typical SHG CSV).")
}
# read.csv default: header = TRUE, check.names = TRUE
d_default <- read.csv(path, header = TRUE, stringsAsFactors = FALSE, check.names = TRUE)
n_default <- names(d_default)
# Explicit no name mangling
d_rawnames <- read.csv(path, header = TRUE, stringsAsFactors = FALSE, check.names = FALSE)
n_raw <- names(d_rawnames)
# Cohort columns: year-like tokens (4 digits, not RACE/SEX/AGE)
is_year_token <- function(nm) {
grepl("^[0-9]{4}$", nm)
}
is_x_year <- function(nm) {
grepl("^X[0-9]{4}$", nm)
}
raw_cohort <- raw_fields[is_year_token(raw_fields)]
default_cohort <- n_default[is_x_year(n_default)]
rawnames_cohort <- n_raw[is_year_token(n_raw)]
message("\n=== Summary ===")
message("Rows read: ", nrow(d_default), " Columns: ", ncol(d_default))
message(
"Cohort-year columns in raw header line (bare digits): ",
length(raw_cohort), " (e.g. ", paste(head(raw_cohort, 5L), collapse = ", "), ", ...)"
)
message(
"After read.csv(check.names = TRUE): ",
sum(is_x_year(n_default)), " names like XYYYY (R adds the X prefix for syntactic names)."
)
message(
"After read.csv(check.names = FALSE): ",
sum(is_year_token(n_raw)), " names still bare digits (e.g. ",
paste(head(rawnames_cohort, 5L), collapse = ", "), ", ...)."
)
if (length(raw_cohort) && length(default_cohort)) {
y0 <- raw_cohort[[1L]]
x0 <- paste0("X", y0)
if (x0 %in% n_default) {
message("\nExample: header '", y0, "' -> column name '", x0, "' with default read.csv().")
}
}
invisible(TRUE)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.