#' Vertically Merge Files in a Directory into a Single Large Dataset
#'
#' @description Vertically concatenates files containing data tables in a long
#' format into a single large dataset. In order for the function to work, all
#' files you wish to merge should be in the same format (either txt or csv).
#' This function is very useful for concatenating raw data files
#' of individual subjects in an experiment (in which each line corresponds to
#' a single observation in the experiment) to one raw data file that includes
#' all subjects.
#'
#' @usage file_merge(
#' folder_path = NULL
#' , has_header = TRUE
#' , new_header = c()
#' , raw_file_name = NULL
#' , raw_file_extension = NULL
#' , file_name = "dataset.txt"
#' , save_table = TRUE
#' , dir_save_table = NULL
#' , notification = TRUE
#' )
#' @param folder_path A string with the path of the folder in which files to be
#' merged are searched. Search is recursive (i.e., can search also in
#' subdirectories). \code{folder_path} must be provided. Default is
#' \code{NULL}.
#' @param has_header Logical. If \code{TRUE}, the function takes the first line
#' of the first file found as the header of the merged table. Default is
#' \code{TRUE}.
#' @param new_header String vector with names for columns of the merged table.
#' Default is \code{c()}. If used, \code{new_header} should be the same length
#' as the number of columns in the merged table.
#' @param raw_file_name A string with the name of the files to be searched
#' and then merged. File extension should NOT be included here (see
#' \code{raw_file_extension}). \code{raw_file_name} must be provided. Default
#' is \code{NULL}.
#' @param raw_file_extension A string with the format of the files (i.e.,
#' \code{csv} or \code{txt}) to be merged. \code{raw_file_extension} must be
#' provided. Default is \code{NULL}.
#' @param file_name A string with the name of the file of the merged table the
#' function creates in case \code{save_table} is \code{TRUE}. Extension of the
#' the file can be txt or csv and should be included. Default is
#' \code{"dataset.txt"}.
#' @param save_table Logical. If \code{TRUE}, saves the merged table. Default is
#' \code{TRUE}.
#' @param dir_save_table A string with the path of the folder in which the
#' merged table is saved in case \code{save_table} is \code{TRUE}. Default is the
#' path provided in \code{folder_path}.
#' @param notification Logical. If \code{TRUE}, prints messages about the
#' progress of the function. Default is \code{TRUE}.
#' @return The merged table
file_merge <- function(folder_path = NULL, has_header = TRUE, new_header = c(),
raw_file_name = NULL, raw_file_extension = NULL,
file_name = "dataset.txt", save_table = TRUE,
dir_save_table = NULL, notification = TRUE) {
## Error handling
# Check if folder_path was provided
if (is.null(folder_path)) {
stop("Oops! folder_path was not found. Must enter folder_path")
}
# Check if has_header is logical
if (!(has_header %in% c(TRUE, FALSE))) {
stop("Oops! has_header is not logical. has_header must be logical")
}
# Check if user entered both raw_file_name raw_file_extension and let
# them know if they forgot one of them
if (!is.null(raw_file_name) & is.null(raw_file_extension)) {
stop("Oops! raw_file_extension is missing.\nPlease provide raw_file_extension")
} else if (is.null(raw_file_name) & !is.null(raw_file_extension)) {
stop("Oops! raw_file_name is missing.\nPlease provide raw_file_name")
}
# Check if raw_file_extension is not NULL
if (!is.null(raw_file_extension)) {
# Check if raw_file_extension is txt or csv
if (!(raw_file_extension %in% c("txt", "csv"))) {
stop("Oops! raw_file_extension is not txt or csv.\nraw_file_extension must be txt or csv")
}
}
# Get file_name extension (also use later when saving the merged table)
extension <- substr(file_name, nchar(file_name) - 3, nchar(file_name))
# Check if extention is txt or csv
if (!(extension %in% c(".txt", ".csv"))) {
stop(paste("Oops!", file_name, "must include txt or csv extension"))
}
## End of error handling
# Set dir_save_tabe to folder_path in case dir_save_table path was not used
if(is.null(dir_save_table)) {
dir_save_table <- folder_path
}
# Make a list of all files in the directory
if (is.null(raw_file_name) & is.null(raw_file_extension)) {
# All files to be merged are located in the same folder
file_list <- list.files(path = folder_path, full.names = TRUE)
} else if (!is.null(raw_file_name) & !is.null(raw_file_extension)) {
# Because raw_file_name and raw_file_extension were provided, enable
# recursive file search according to pattern
file_list <- list.files(path = folder_path, recursive = TRUE,
full.names = TRUE,
pattern = paste(raw_file_name, ".*\\.", raw_file_extension, sep = ""))
}
## More error handling
# In case file_list has 0 files stop the function
if (length(file_list) == 0) {
stop("Oops! 0 files were found.\nPlease check folder_path, raw_file_name and raw_file_extension")
}
# Message how many files were found in file list
if (notification) {
if (length(file_list) == 1) {
message(paste("Found", length(file_list), "file"))
} else {
message(paste("Found", length(file_list), "files"))
}
}
## More error handling
# Check if all files end with txt or csv extnsion
file_type <- c()
f <- 1
for (file in file_list) {
# Get the extension of each file in file_list
type <- substr(file, nchar(file) - 3, nchar(file))
if (type != ".txt" & type != ".csv") {
# Found file that is not in a txt or csv format
stop(paste("Oops", file, "in folder", basename(dirname(file)), "is not in txt or csv format"))
}
file_type[f] <- type
f <- f + 1
}
# Check if all files end with the same extension
if (".txt" %in% file_type & ".csv" %in% file_type) {
# Found both txt and csv files
stop("Oops! both txt and csv files were found.\nAll files to merge be should be in the same format")
}
## End of more error handling
# Counter for for loop
i <- 1
# Merge files vertically
for (file in file_list) {
# Get file extension
extension_f <- substr(file, nchar(file) - 3, nchar(file))
# Read first file in file_list into dataset
if (match(file_list[i], file_list) == 1) {
if (extension_f == ".txt") {
dataset <- read.table(file = file, header = has_header)
} else if (extension_f == ".csv") {
dataset <- read.csv(file = file, header = has_header)
}
}
# Append current file to large dataset if it is not the first file
if (match(file_list[i], file_list) != 1) {
# Read current file into temp_dataset
if (extension_f == ".txt") {
temp_dataset <- read.table(file = file, header = has_header)
} else if (extension_f == ".csv") {
temp_dataset <- read.csv(file = file, header = has_header)
}
# Append temp_dataset to dataset
dataset <- rbind(dataset, temp_dataset)
# Remove temp_dataset
rm(temp_dataset)
}
# Increase counter
i <- i + 1
} # End of for loop
# Check if new_header is used or not
if (length(new_header) > 0) {
# Check if new_header is in the same length as the number of variables in
# dataset
if (length(new_header) != dim(dataset)[2]) {
stop("Oops! new_header should be the same length as the number of\ncolumns in the files to be merged")
} else {
# new_header is in the right length
if (has_header) {
# Original data had a header
if (notification) {
# Message
message("Found new_header. Replacing existing header with new_header")
}
} else {
# Add an header according to new_header
if (notification) {
# Message
message("Adding header according to new_header")
}
}
}
# Replace header in dataset in new_header
colnames(dataset) <- new_header
} else if (!has_header) {
if (notification) {
# Message
message(paste("Adding header according to v1:v", dim(dataset)[2], sep = ""))
}
# Add colnames v1:vn to dataset
colnames(dataset) <- paste("v", 1:dim(dataset)[2], sep = "")
} # End of if (length(new_header) > 0)
# Save table in case save_table is set to TRUE
if (save_table) {
# Save the table as txt csv
if (extension == ".txt") {
# Save table in txt format
write.table(dataset, file = paste(dir_save_table, "/", file_name, sep = ""),
row.names = FALSE)
} else {
# Save table as csv format
write.csv(dataset, file = paste(dir_save_table, "/", file_name, sep = ""),
quote = FALSE, row.names = FALSE)
}
if (notification) {
# Message
message(paste(length(file_list), "files were merged and saved into", file_name))
}
} # End of save table
# Message file_merge() finished
message("file_merge() finished!")
# Return
return(dataset)
} # End of file_merge
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.