#' Checks if jobs were run with the same seed on older logs
#'
#' Checks every log file inside a folder for the record of the used seed and
#' returns duplicated seeds and corresponding job arrays.
#'
#' @inheritParams default_params_doc
#'
#' @return A data frame with four columns. Each line contains the information
#' of one result with a duplicated seed. There will be no lines if there are
#' no duplicated seeds in the any of the logs.
#' Columns are as follows:
#' * `Data`: A character vector with the name of the data set where duplicates
#' were found.
#' * `Models`: A numeric with corresponding array index. Will be empty if no
#' duplicates were found.
#' * `Seeds`: A numeric with the corresponding seed that was duplicated.
#' * `Array_indices`: A numeric with corresponding array index.
#' @export
#' @author Pedro Santos Neves
#'
#' @note
#' In this function, the lines containing the scraped values can be anywhere in
#' each file, as this occurred in older versions of the package. Checking is
#' thus less efficient and preference should be given to
#' [check_rep_seeds()] in most cases, unless older log files are being
#' checked. [check_rep_seeds()] will fail for most older log files.
#'
#' @seealso [check_rep_seeds()] for optimized checks on current logs.
#'
#' @examples
#' \dontrun{
#' repeated_seeds <- check_rep_seeds_depr(logs_path = "/logs/")
#' }
check_rep_seeds_depr <- function(logs_path) {
message(
"NOTE: Use check_rep_seeds() instead if checking logs generated by current
versions of DAISIEutils."
)
testit::assert(fact = "Folder exists", dir.exists(logs_path))
logfiles <- list.files(logs_path, full.names = TRUE)
testit::assert(fact = "Folder has logfiles", length(logfiles) >= 1)
logs <- lapply(logfiles, readLines)
data_names <- c()
model_names <- c()
array_indices <- c()
seeds <- c()
for (i in seq_along(logs)) {
data_line <- logs[[i]][grepl("Data name", logs[[i]])]
model_line <- logs[[i]][grepl("Model name", logs[[i]])]
array_line <- logs[[i]][grepl("Running analysis with array", logs[[i]])]
seed_line <- logs[[i]][grepl("Running analysis with seed", logs[[i]])]
# Check validity of file
size_data_line <- length(data_line)
size_model_line <- length(model_line)
size_array_line <- length(array_line)
size_seed_line <- length(seed_line)
invalid_lines <- any(
c(
size_data_line < 1,
size_model_line < 1,
size_array_line < 1,
size_seed_line < 1
)
)
if (invalid_lines) {
stop("Invalid file found: ", logfiles[i])
}
data_names[i] <- sub(".*: ", "", data_line)
model_names[i] <- sub(".*: ", "", model_line)
array_indices[i] <- sub(".*: ", "", array_line)
seeds[i] <- sub(".*: ", "", seed_line)
}
duplicated_seeds <- seeds[seeds %in% unique(seeds[duplicated(seeds)])]
duplicated_seeds <- as.numeric(duplicated_seeds)
duplicated_seed_indices <- which(seeds %in% unique(seeds[duplicated(seeds)]))
duplicated_data_names <- data_names[duplicated_seed_indices]
duplicated_model_names <- model_names[duplicated_seed_indices]
duplicated_array_indices <- as.numeric(array_indices[duplicated_seed_indices])
out <- data.frame(
"Data" = duplicated_data_names,
"Models" = duplicated_model_names,
"Seeds" = duplicated_seeds,
"Array_indices" = duplicated_array_indices
)
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.