#' @title Validate all studies
#'
#' @description Validate metadata and manifest files for every study
#' in consortium. Returns a named list of validation results, where the name
#' refers to the study name. The validation checks are done via
#' `dccvalidator::check_all()`.
#'
#' @param fileview The fileview table.
#' @param annotations A data frame of annotation definitions.
#' Must contain at least three columns: key, value, and columnType.
#' @param syn Synapse client object.
#' @return Named list of check results, where the names of the list refer
#' to the study. Returns NULL if no studies in fileview.
validate_all_studies <- function(fileview, annotations, syn) {
# Get the list of study names
study_names <- unique(fileview$study)
if (length(study_names) == 0) {
return(NULL)
}
# Run validation checks on each set of study files
full_results <- purrr::map(
study_names,
function(name) {
study_table <- filter_study_table_latest(fileview, name)
results <- validate_study(
study_table = study_table,
annotations = annotations,
syn = syn,
study = name
)
results
}
)
names(full_results) <- study_names
full_results
}
#' @title Validate a study
#'
#' @description Run validation checks for a single study and return the
#' results. Validation checks are run via `dccvalidator::check_all()`. It
#' is assumed that there is, at most, one row per metadataType in the
#' `study_table` passed in.
#'
#' @importFrom dccvalidator check_all
#' @param study_table Tibble with fileview information for
#' a single study. Expected columns are: 'metadataType',
#' 'file_data' (tibble column with data for each file), 'assay',
#' 'species', 'biospecimen_type' (optional). Should have, at most, one row
#' per metadataType.
#' @param annotations A data frame of annotation definitions.
#' Must contain at least three columns: 'key', 'value', and 'columnType'.
#' @param syn Synapse client object.
#' @param study Name of the study.
#' @return Named list of check results. Returns `NULL` if there are no rows.
validate_study <- function(study_table, annotations, syn, study) {
# Check that there is one row per metadata type
# If not, add "dummy" row
# Required by dccvalidator::check_all()
required <- c("manifest", "assay", "biospecimen", "individual")
if (!any(required %in% study_table$metadataType)) {
return(NULL)
}
# Check if biospecimenType exists in table
# If yes, make sure it's a character column
if ("biospecimenType" %in% names(study_table)) {
class(study_table$biospecimenType) <- "character"
}
if (!all(required %in% study_table$metadataType)) {
present_types <- which(required %in% study_table$metadataType)
for (type in required[-present_types]) {
study_table <- tibble::add_row(
study_table,
metadataType = type
)
}
} # else all present and it's okay to move on
# Grab all metadata templates from config
study_table <- add_template_col(study_table = study_table)
samples_table = get_golem_config("samples_table")
if (is.null(samples_table)) samples_table <- NA
results <- dccvalidator::check_all(
data = study_table,
annotations = annotations,
syn = syn,
study = study,
samples_table = samples_table
)
results
}
#' @title Add template column
#'
#' @description Add a template column to the `study_table`. The `study_table`
#' requires columns metadataType, assay, species, and allows optional
#' biospecimenType. Must have one row per `metadataType`: individual, assay,
#' biospecimen, manifest.
#'
#' @noRd
#' @inheritParams validate_study
add_template_col <- function(study_table) {
# Ensure the needed columns exist
if (!all(c("metadataType", "assay", "species") %in% names(study_table))) {
stop("The study table should have the columns metadataType, assay, species")
}
if ("biospecimenType" %in% names(study_table)) {
biospecimen_type <- unique(stats::na.omit(study_table[["biospecimenType"]]))
biospecimen_type[biospecimen_type == "NaN"] <- NA # Convert NaN to NA
biospecimen_type <- unique(stats::na.omit(biospecimen_type))
# It's possible to get a column of NaN, NA, NULL; check length in case
if (any(c(
length(biospecimen_type) < 1),
biospecimen_type %in% c("NaN", ""))
) {
biospecimen_type <- NA
}
} else {
biospecimen_type <- NA
}
species <- unique(stats::na.omit(study_table[["species"]]))
assay <- unique(stats::na.omit(study_table[["assay"]]))
study_table[, "template"] <- unlist(purrr::map(
study_table[["metadataType"]],
function(x) {
switch(x,
manifest = dccvalidator::gather_template_ids("manifest"),
individual = dccvalidator::gather_template_ids(
"individual",
species = species
),
assay = {
if (any(c(length(assay) < 1, !is.na(assay)))) {
dccvalidator::gather_template_ids(
"assay",
species = species,
assay = assay
)
} else {
return(NA)
}
},
biospecimen = {
if (!is.na(biospecimen_type)) {
template <- dccvalidator::gather_template_ids(
"biospecimen",
species = species,
biospecimen_type = biospecimen_type
)
} else {
template <- dccvalidator::gather_template_ids(
"biospecimen",
species = species
)
}
# Should only have a single template, but if there was no biospecimen
# and the config has biospecimen types, then will have all possible
# Just return NA in that case
if (length(template) > 1) {
return(NA)
}
template
}
)
}
))
return(study_table)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.