#' Group Data
#'
#' Group primary single-stock data files in subdirectories, depending on what
#' columns each data file contains.
#'
#' @param dir directory containing the primary data files.
#' @param quiet whether to suppress screen output.
#'
#' @return
#' Files are copied into subdirectories. As a byproduct, a list is returned,
#' describing which subdirectories contain which data files.
#'
#' @note
#' A primary data file can have a filename such as
#' \file{Yellowtail_snapper_Mexico.csv} and columns such as
#' \code{stockid|scientificname|commonname|year|catch|stocklong}.
#'
#' In addition, the data file may have columns called \code{best_effort} and/or
#' \code{best_index}, containing the effort and/or index series to be used in
#' the SOFIA analysis.
#'
#' This function creates four subdirectories:
#' \enumerate{
#' \item \code{both} - for data files containing both effort and index data
#' \item \code{effort} - for data files containing effort data (and possibly
#' also index)
#' \item \code{index} - for data files containing index data (and possibly also
#' effort)
#' \item \code{neither} - for data files containing neither effort nor index
#' data
#' }
#'
#' The object returned has an attribute \code{count}, showing the number of data
#' files in each subdirectory. The number of original (unique) data files will
#' be:
#' \preformatted{
#' both + (effort-both) + (index-both) + neither
#' }
#'
#' The functions \code{groupData} and \code{convertData} are used together:
#' first group, then convert.
#'
#' @author Arni Magnusson.
#'
#' @seealso
#' \code{\link{convertData}} converts primary data to combined data.
#'
#' \code{\link{SOFIA-package}} gives an overview of the package.
#'
#' @examples
#' \dontrun{
#' groupData("Data_files_Area_31_3")
#' groupData("Data_files_Area_31_3", quiet=TRUE)
#' }
#'
#' @importFrom utils read.csv
#'
#' @export
groupData <- function(dir, quiet=FALSE)
{
if(!dir.exists(dir))
stop("'", dir, "' not found")
## 1 Import CSV files
files <- dir(dir, pattern="\\.csv$", full=TRUE)
if(length(files) == 0)
{
if(!quiet)
message("No CSV files found in '", dir, "'")
return()
}
csv <- lapply(files, read.csv)
names(csv) <- basename(files)
## 2 Create directories
unlink(file.path(dir, c("both","effort","index","neither")), recursive=TRUE)
dir.create(file.path(dir, "both"))
dir.create(file.path(dir, "effort"))
dir.create(file.path(dir, "index"))
dir.create(file.path(dir, "neither"))
## 3 Copy files into directories
for(i in seq_along(files))
{
n <- tolower(names(csv[[i]]))
if("best_effort" %in% n && "best_index" %in% n)
file.copy(files[i], file.path(dir, "both"))
if("best_effort" %in% n)
file.copy(files[i], file.path(dir, "effort"))
if("best_index" %in% n)
file.copy(files[i], file.path(dir, "index"))
if(!("best_effort" %in% n) && !("best_index" %in% n))
file.copy(files[i], file.path(dir, "neither"))
## Report when column names look suspicious
if(!("best_effort" %in% n) && any(grepl("effort", tolower(n))))
warning(basename(files[i]), "\n has effort data (",
paste(n[grep("effort", tolower(n))], collapse=", "),
") but no 'best_effort'")
if(!("best_index" %in% n) && any(grepl("index", tolower(n))))
warning(basename(files[i]), "\n has index data (",
paste(n[grep("index", tolower(n))], collapse=", "),
") but no 'best_index'")
}
## 4 Return list
out <- list(both=dir(file.path(dir, "both")),
effort=dir(file.path(dir, "effort")),
index=dir(file.path(dir, "index")),
neither=dir(file.path(dir, "neither")))
attr(out, "count") <- c(sapply(out, length), unique=length(files))
if(quiet)
invisible(out)
else
out
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.