#' Read data from subdirectories into a structured list
#'
#' Recurse through subdirectories returning either filenames or tibbles (data.frames) by reading csv, xls, or xlsx with \code{e_read_data_files()}.
#'
#' @param fn_path Starting directory path
#' @param fn_detect NULL for all. File specification, used by \code{stringr::str_detect()}, usually specifying file extensions.
#' @param sw_fn_or_dat Return filenames or tibbles (data.frames)
#' @param sw_exclude_empty_dir T/F exclude empty directories
#' @param sw_dat_add_col_path_fn T/F for data, add two columns specifying the directory (\code{DIR__}) and filename (\code{FILE__})
#' @param sw_dat_print_fn_read T/F print file names and dimensions as the files are read
#' @param excel_sheets "all" for all sheets, or a list of numbers "\code{c(1, 2)}"; applies to all excel sheets. Passed to \code{e_read_data_files()}.
#' @param sw_clean_names For data, T/F to clean column names using \code{janitor::clean_names}
#' @param sw_list_or_flat Hierarical list or a "flat" 1-level list (if "fn" with "flat", then will prepend path to fn)
#' @param excel_range When reading Excel files, NULL reads entire sheet, a range is specified as in \code{readxl::read_xlsx}. Applies to all files.
#' @param excel_col_names Specified as in \code{readxl::read_xlsx}. Applies to all files.
#' @param sw_delim F if standard delim, otherwise delim character such as "|"
#' @param sw_read_package_csv_txt "readr" for \code{read_csv} and \code{read_delim}, "utils" for \code{read.csv} and \code{read.delim}
#'
#' @return fn_names Either a structured list of filenames or of tibbles
#' @import dplyr
#' @importFrom stringr str_detect
#' @export
#'
#' @examples
#' \dontrun{
#' # # all file names
#' # e_read_data_subdir_into_lists(
#' # fn_path = "./data-raw/dat_subdir"
#' # , fn_detect = NULL
#' # , sw_fn_or_dat = c("fn", "dat")[1]
#' # , sw_exclude_empty_dir = c(TRUE, FALSE)[1]
#' # )
#' # # selected file names
#' # e_read_data_subdir_into_lists(
#' # fn_path = "./data-raw/dat_subdir"
#' # , fn_detect = c("csv$", "xls$", "xlsx$")
#' # , sw_fn_or_dat = c("fn", "dat")[1]
#' # , sw_exclude_empty_dir = c(TRUE, FALSE)[1]
#' # )
#' # # selected data
#' # e_read_data_subdir_into_lists(
#' # fn_path = "./data-raw/dat_subdir"
#' # , fn_detect = c("csv$", "xls$", "xlsx$")
#' # , sw_fn_or_dat = c("fn", "dat")[2]
#' # , sw_exclude_empty_dir = c(TRUE, FALSE)[1]
#' # , sw_dat_add_col_path_fn = c(TRUE, FALSE)[1]
#' # , sw_dat_print_fn_read = c(TRUE, FALSE)[1]
#' # , sw_clean_names = c(TRUE, FALSE)[2]
#' # , sw_list_or_flat = c("list", "flat")[1]
#' # , sw_delim = c(FALSE, "|")[1]
#' # , sw_read_package_csv_txt = c("readr", "utils")[1]
#' # )
#' # # selected data, flatten the directory structure
#' # e_read_data_subdir_into_lists(
#' # fn_path = "./data-raw/dat_subdir"
#' # , fn_detect = c("csv$", "xls$", "xlsx$")
#' # , sw_fn_or_dat = c("fn", "dat")[2]
#' # , sw_exclude_empty_dir = c(TRUE, FALSE)[1]
#' # , sw_dat_add_col_path_fn = c(TRUE, FALSE)[1]
#' # , sw_dat_print_fn_read = c(TRUE, FALSE)[1]
#' # , sw_clean_names = c(TRUE, FALSE)[2]
#' # , sw_list_or_flat = c("list", "flat")[2]
#' # , sw_delim = c(FALSE, "|")[1]
#' # , sw_read_package_csv_txt = c("readr", "utils")[1]
#' # )
#' }
e_read_data_subdir_into_lists <-
function(
fn_path = "."
, fn_detect = c("csv$", "xls$", "xlsx$")
, sw_fn_or_dat = c("fn", "dat")[1]
, sw_exclude_empty_dir = c(TRUE, FALSE)[1]
, sw_dat_add_col_path_fn = c(TRUE, FALSE)[1]
, sw_dat_print_fn_read = c(TRUE, FALSE)[2]
, excel_sheets = "all"
, sw_clean_names = c(TRUE, FALSE)[2]
, sw_list_or_flat = c("list", "flat")[1]
, excel_range = NULL
, excel_col_names = TRUE
, sw_delim = c(FALSE, "|")[1]
, sw_read_package_csv_txt = c("readr", "utils")[1]
) {
# original idea
# https://stackoverflow.com/questions/27780593/read-nested-folder-and-file-names-as-nested-list/27783472#27783472
## fn_path = "D:/Dropbox/StatAcumen/consult/Rpackages/erikmisc/data-raw/dat_subdir" #/dir_a/dir_aa/dir_aaa"
## fn_path = "D:/Dropbox/StatAcumen/consult/Rpackages/erikmisc/data-raw/dat_subdir/dir_a/dir_aa" #/dir_aaa"
## fn_path = "D:/Dropbox/StatAcumen/consult/Rpackages/erikmisc/data-raw/dat_subdir/dir_a/dir_aa/dir_aaa"
## fn_path = "D:/Dropbox/StatAcumen/consult/Rpackages/erikmisc/data-raw/dat_subdir/dir_b" #/dir_aaa"
## fn_detect = "txt$" # NULL #c("csv$", "xls$", "xlsx$")
## sw_fn_or_dat = c("fn", "dat")[2]
## sw_exclude_empty_dir = c(TRUE, FALSE)[1]
## sw_delim = c(FALSE, "|")[2]
## dat_temp <-
## e_read_data_subdir_into_lists(
## fn_path = "D:/Dropbox/StatAcumen/consult/Rpackages/erikmisc/data-raw/dat_subdir" # /dir_a/dir_aa
## , fn_detect = "txt$" # c("csv$", "xls$", "xlsx$")
## , sw_fn_or_dat = c("fn", "dat")[1]
## , sw_exclude_empty_dir = c(TRUE, FALSE)[1]
## , sw_dat_add_col_path_fn = c(TRUE, FALSE)[1]
## , sw_dat_print_fn_read = c(TRUE, FALSE)[1]
## , excel_sheets = "all"
## , sw_clean_names = c(TRUE, FALSE)[2]
## , sw_list_or_flat = c("list", "flat")[1]
## , excel_range = NULL
## , excel_col_names = TRUE
## , sw_delim = c(FALSE, "|")[2]
## )
## dat_temp
## lapply(dat_temp, class)
# All files and directories
fn_names <-
list.files(
path = fn_path
, no.. = TRUE
)
# Only directory names
dir_names <-
list.dirs(
path = fn_path
, full.names = FALSE
, recursive = FALSE
)
# Determine files found, excluding directory names
fn_to_return <-
fn_names[!fn_names %in% dir_names]
# keep those matching fn_detect specification
if (!is.null(fn_detect)){
ind_dat <-
stringr::str_detect(
string = fn_to_return
, pattern = paste0(fn_detect, collapse = "|")
)
fn_to_return <-
fn_to_return[ind_dat]
}
fn_names <-
fn_to_return
# If there are directories
if(length(dir_names)) {
# recursion
fn_subdir <-
lapply(
file.path(fn_path, dir_names)
, e_read_data_subdir_into_lists
# include function arguments for recursion, otherwise takes function defaults
, fn_detect = fn_detect
, sw_fn_or_dat = sw_fn_or_dat
, sw_exclude_empty_dir = sw_exclude_empty_dir
, sw_dat_add_col_path_fn = sw_dat_add_col_path_fn
, sw_dat_print_fn_read = sw_dat_print_fn_read
, excel_sheets = excel_sheets
, sw_clean_names = sw_clean_names
, sw_list_or_flat = sw_list_or_flat
, excel_range = excel_range
, excel_col_names = excel_col_names
, sw_delim = sw_delim
, sw_read_package_csv_txt = sw_read_package_csv_txt
)
# Set names for the new list
names(fn_subdir) <-
dir_names
if (sw_exclude_empty_dir) {
ind_empty_dir <-
which(
unlist(
lapply(
fn_subdir
, is.null
)
)
)
if(length(ind_empty_dir)) {
for(i_dir in ind_empty_dir) {
fn_subdir[i_dir] <- NULL
}
}
} # if sw_exclude_empty_dir
if (sw_fn_or_dat == "fn") {
# Combine appropriate results for current list
if(length(fn_to_return)) {
if (sw_list_or_flat == c("list", "flat")[1]) {
fn_names <-
c(
# list() makes this a list of fn's instead of separate lists for each fn
list(fn_to_return)
, fn_subdir
)
}
if (sw_list_or_flat == c("list", "flat")[2]) {
# 11/11/2023 prepend path name to file names for flat
for (n_dir in dir_names) {
## n_dir = dir_names[1]
fn_subdir[[ n_dir ]] <-
file.path(n_dir, fn_subdir[[ n_dir ]])
} # n_dir
fn_names <-
c(
fn_to_return
, unlist(fn_subdir)
)
}
} else {
fn_names <-
fn_subdir
}
} # if sw_fn_or_dat "fn"
# read data
if (sw_fn_or_dat == "dat") {
if(length(fn_to_return)) {
dat_to_return <-
e_read_data_files(
read_fn_path = fn_path
, read_fn_names = fn_to_return
, sw_dat_add_col_path_fn = sw_dat_add_col_path_fn
, sw_dat_print_fn_read = sw_dat_print_fn_read
, excel_sheets = excel_sheets
, sw_clean_names = sw_clean_names
, excel_range = excel_range
, excel_col_names = excel_col_names
, sw_delim = sw_delim
, sw_read_package_csv_txt = sw_read_package_csv_txt
)
if (sw_list_or_flat == c("list", "flat")[1]) {
fn_names <-
c(
dat_to_return
, fn_subdir
)
}
if (sw_list_or_flat == c("list", "flat")[2]) {
fn_names <-
c(
dat_to_return
, unlist(fn_subdir, recursive = FALSE)
)
}
} else {
if (sw_list_or_flat == c("list", "flat")[1]) {
fn_names <-
fn_subdir
}
if (sw_list_or_flat == c("list", "flat")[2]) {
fn_names <-
unlist(fn_subdir, recursive = FALSE)
}
}
} # if sw_fn_or_dat "dat"
# end of directories
} else { # if length(dir_names)
if (sw_fn_or_dat == "dat") {
if(length(fn_names)) {
dat_to_return <-
e_read_data_files(
read_fn_path = fn_path
, read_fn_names = fn_names
, sw_dat_add_col_path_fn = sw_dat_add_col_path_fn
, sw_dat_print_fn_read = sw_dat_print_fn_read
, excel_sheets = excel_sheets
, sw_clean_names = sw_clean_names
, excel_range = excel_range
, excel_col_names = excel_col_names
, sw_delim = sw_delim
, sw_read_package_csv_txt = sw_read_package_csv_txt
)
# files, but no data files to read
if (length(dat_to_return) == 0) {
dat_to_return <- NULL
}
if (sw_list_or_flat == c("list", "flat")[1]) {
fn_names <-
dat_to_return
} # if list
if (sw_list_or_flat == c("list", "flat")[2]) {
#if ("list" %in% class(dat_to_return[[1]])) {
if (inherits(dat_to_return[[1]], "list")) {
fn_names <-
unlist(dat_to_return, recursive = FALSE)
} else {
fn_names <-
dat_to_return
}
} # if flat
} # if length(fn_names)
} # if sw_fn_or_dat
} # if length(dir_names)
if(length(fn_names)) {
return(fn_names)
} else {
return(NULL)
}
} # e_read_data_subdir_into_lists
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.