#' Read data from exchange data
#' @title Read stomach contents data on exchange format
#' @param control Options \link{STOMcontrol-class}) for dplyr::selecting and collating stomach data.
#' @param delete_errors Logical for deleting records with errors.
#' if FALSE, the function will stop at the first error found,
#' if TRUE records with errors will be deleted, but the function continues if possible.
#' @param allow_alias_names Logical for using alias field names as specified in the stomach_format
#' @param keep_just_mandatory_fields Logical for just keeping mandatory field names as specified in the stomach_format
#' @param fileEncoding character string: if non-empty declares the encoding used on a file so the character data can be re-encoded.
#' @return Stomach data of class STOMobs.
#' @importFrom readr read_csv
#' @export
#' @examples \dontrun{read_exchange_data(control, fileEncoding = "latin1")}
read_exchange_data <- function(control, delete_errors = FALSE, allow_alias_names = FALSE, keep_just_mandatory_fields = FALSE,fileEncoding="") {
# delete_errors = FALSE; allow_alias_names = FALSE; keep_just_mandatory_fields = FALSE; fileEncoding=""
alias_1<-alias_2<-alias_3<-dataset<-field<-fish_id<-mandatory<-n_sample_id<-pred_l<-pred_ll<-pred_lu<-pred_name<-sample_id<-types<-year<-NULL
options(dplyr.summarise.inform = FALSE)
stom_dir <- control@stomach_dir
exchange_file <- control@dataSets
if (is.na(exchange_file[1]))
stop("\nError Input file name for stomach data on exchange format must be given in the control@datasets\n")
stomach_format <- eval(control@stomach_format)
b <- read.csv(file = stomach_format, stringsAsFactors = FALSE,fileEncoding=fileEncoding)
if (allow_alias_names)
b <- subset(b, select = c(field, types, mandatory, alias_1, alias_2, alias_3)) else b <- subset(b, select = c(field, types, mandatory))
all_fields <- b$field
mandatory_names <- subset(b, mandatory)$field
# read stomach data using type definitions
a <- lapply(exchange_file, function(x) {
# test x<-exchange_file[1]
cat("\nProcessing file_", file.path(stom_dir, x), "\n")
a <- readr::read_csv(file = file.path(stom_dir, x), n_max = 1, col_types = readr::cols())
name_in <- colnames(a)
name_in_org <- name_in
if (allow_alias_names) {
b$alias_4 <- b$field
any_name <- unique(c(b$alias_1, b$alias_2, b$alias_3, b$alias_4))
any_name <- any_name[any_name != ""]
# if (length(setdiff(name_in, any_name)) > 0) { cat(paste0('File: ', x, ' includes variable names\n', paste(setdiff(name_in, any_name), collapse = ', '), '\n
# which are not included as a valid variable name in file:', stomach_format,'. \n The invalid variable is not included in the resulting data set.\n')) }
found <- matrix(0, nrow = 4, ncol = length(name_in))
colnames(found) <- name_in
for (i in (1:4)) found[i, ] <- match(name_in, with(b, get(paste0("alias_", i))))
key <- colMeans(found, na.rm = TRUE)
name_in <- all_fields[key]
}
if (!setequal(mandatory_names, intersect(name_in, mandatory_names))) {
stop(paste0("File: ", x, " does not include the mandatory field:", setdiff(mandatory_names, intersect(name_in, mandatory_names)), "\n"))
}
if (length(setdiff(name_in, all_fields)) > 0) {
cat(paste0("File: ", x, " includes variable names:\n", paste(setdiff(name_in, all_fields), collapse = ", "),
"\n which are not considered as a valid variable name and is not included in file:", stomach_format),'\n')
# cat(paste0("File: ", x, " includes variable names:\n", paste(name_in_org[is.na(name_in)], collapse = ", "),
# "\n which are not considered as a valid variable name and are not included in file:", stomach_format),'\n')
if (!delete_errors)
stop("remove not included variables, or rerun with parameter delete_errors=TRUE")
cat("The invalid variable is not included in the resulting data set.\n")
}
if (!allow_alias_names) {
b <- subset(b, field %in% name_in)
key <- match(name_in, b$field)
}
types <- paste(b[key, "types"], collapse = "")
types <- gsub("NA", "-", types)
a <- readr::read_csv(file = file.path(stom_dir, x), col_types = types, na = c("", "NA", "NULL", "-999", "-9", "-99")) # read data with specified data type
prbl<-readr::problems(a)
if (dim(prbl)[[1]]>0) print(prbl)
coln <- b[key, "field"]
colnames(a) <- coln[!is.na(coln)]
return(a)
})
# combine them into one
a <- do.call(dplyr::bind_rows, a)
mis_pl <- is.na(a$pred_l) & is.na(a$pred_ll) & is.na(a$pred_lu)
if (any(mis_pl)) {
if (delete_errors) {
cat("Records with neither pred_l, pred_ll or pred_lu information have been deleted\n")
print(a[mis_pl, ])
a <- a[!mis_pl, ]
} else {
cat("Records with neither pred_l, pred_ll or pred_lu information\n")
print(a[mis_pl, ] %>%
dplyr::select(dataset, year, sample_id, pred_name, pred_l, pred_ll, pred_lu))
("read_exchange_data terminated due to errors in stomach data. Please corrct data or chose parameter delete_errors=TRUE")
}
}
if (keep_just_mandatory_fields)
a <- dplyr::select(a, dplyr::all_of(mandatory_names))
## convert all strings to factors
strings_to_factors <- function(x) {
x %>%
dplyr::mutate_if(is.character, as.factor)
}
strings_to_factors_NA <- function(x) {
x %>%
dplyr::mutate_if(is.character, forcats::fct_na_value_to_level)
}
a <- strings_to_factors_NA(a)
# divide into two data sets, for creation of class STOMobs
cols <- colnames(a)
b <- read.csv(file = stomach_format, stringsAsFactors = FALSE)
pred <- subset(b, PRED == TRUE, select = field)$field
incl <- intersect(cols, pred)
PRED <- a %>%
dplyr::select(dplyr::all_of(incl)) %>%
dplyr::distinct() %>%
dplyr::arrange(sample_id, fish_id)
a1 <- PRED %>%
dplyr::group_by(sample_id, fish_id) %>%
dplyr::summarise(n_sample_id = dplyr::n()) %>%
dplyr::filter(n_sample_id > 1)
if (dim(a1)[[1]] > 0) {
a3 <- dplyr::left_join(a1, PRED)
cat("the following records do not have a unique combination of sample_id and fish_id\n")
print(a3)
stop()
a3
}
prey <- subset(b, PREY == TRUE, select = field)$field
incl <- intersect(cols, prey)
PREY <- a %>%
dplyr::select(dplyr::all_of(incl)) %>%
dplyr::arrange(sample_id, fish_id)
a <- list(PRED = PRED, PREY = PREY)
## assign attributes
# attr(a,'PRED')<-names(a[['PRED']])
# attr(a,'PREY')<-names(a[['PREY']])
for (at in all_stom_attributes()) {
attr(a, at) <- FALSE
}
attr(a, "control") <- control
class(a) <- "STOMobs"
a<-update(a)
if (!is.na(control@predators[1])) a<-subset(a,pred_name %in% control@predators )
if (!is.na(control@years[1])) a<-subset(a,year %in% control@years )
if (!is.na(control@quarters[1])) a<-subset(a,quarter %in% control@quarters )
b <- check_unique_sample_id(a)
if (dim(b)[[1]] > 0) {
print(data.frame(b))
print(xtabs(~year + country, data = b))
stop("Duplicated combinations of sample_id and fish_id may have been found, please check!")
}
return(a)
}
#' Write data on exchange data
#' @title Write stomach contents data of class STOMobs on the exchange format.
#' @param stom Input stomach data set for writing
#' @param exchange_file File name for output data on exchange format.
#' @param fileEncoding character string: if non-empty declares the encoding used on a file so the character data can be re-encoded.
#' @return Returns the input stom invisibly.
#' @examples \dontrun{write_exchange_data(tst, exchange_file = 'cod_stomachs_2017.csv')}
#' @export
write_exchange_data <- function(stom, exchange_file = NA,fileEncoding = "") {
if (is.na(exchange_file))
stop("\nError Input file name for stomach data on exchange format must be given\n")
write.csv(as.data.frame(stom), file = exchange_file,fileEncoding = fileEncoding)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.