# Prettify warnings etc. in notebook
# Source: https://selbydavid.com/2017/06/18/rmarkdown-alerts/
knitr::knit_hooks$set(
  error = function(x, options) {
    paste('\n\n<div class="alert alert-danger">',
          gsub('##', '\n', gsub('^#*\ *Error', '**Error:**', x)),
          '</div>', sep = '\n')
  },
  warning = function(x, options) {
    paste('\n\n<div class="alert alert-warning">',
          gsub('##', '\n', gsub('^#*\ *Warning:', '**Warning:**', x)),
          '</div>', sep = '\n')
  },
  message = function(x, options) {
    if (grepl(pattern = '^#*\ *Error:', x = x)){
      paste('\n\n<div class="alert alert-danger">',
            gsub('##', '\n', gsub('^#*\ *Error', '**Error:**', x)),
            '</div>', sep = '\n')
    } else if (grepl(pattern = '^#*\ *Warning:', x = x)){
      paste('\n\n<div class="alert alert-warning">',
            gsub('##', '\n', gsub('^#*\ *Warning:', '**Warning:**', x)),
            '</div>', sep = '\n')
    } else if (grepl(pattern = '^#*\ *Success:', x = x)){
      paste('\n\n<div class="alert alert-success">',
            gsub('##', '\n', gsub('^#*\ *Success:', '', x)),
            '</div>', sep = '\n')
    } else {
      paste('\n\n<div class="alert alert-info">',
            gsub('##', '\n', x),
            '</div>', sep = '\n')
    }
  }
)


# Set chunk/code evaluation control variables defaults
CONTINUE <- TRUE
MULTIBATCH <- FALSE
AVAILABLE_BIOLOGICAL <- FALSE
AVAILABLE_POOLED_QC <- FALSE
AVAILABLE_REFERENCE_QC <- FALSE
ENOUGH_BIOLOGICAL <- FALSE
ENOUGH_POOLED_QC <- FALSE
ENOUGH_REFERENCE_QC <- FALSE
NORMALIZED <- FALSE
IS_BIOCRATES <- FALSE


# Parse and validate parameters

# Initiate some dynamic global variables
init_dynamic_global_variables()

# Kit selection
if ("kit" %in% names(params)) {
  assert_that(params$kit %in% KITS)
  if (params$kit %in% KITS_BIOCRATES){
    IS_BIOCRATES = TRUE
  }
} else {
  stop(paste0(
    "No kit selected. Currently supported: '",
    paste(KITS, collapse = "', '"), "'"))
}

# Data files
if ("data_files" %in% names(params)){
  if (IS_BIOCRATES){
    assert_that(class(params$data_files) == "list")
  } else { # Generic data
    # Check if list (separate batches) or vector (one or include batches)
    assert_that(
      class(params$data_files) == "list"
      || class(params$data_files) == "character"
    )
  }
}

if (params$kit == KIT_GENERIC_DATA) {
  # Generic data types
  assertthat::assert_that("generic_data_types" %in% names(params))
  assertthat::assert_that(all(names(params$generic_data_types) %in% MANIFESTATION_VARIABLES))
  # Compound index for generic data
  assertthat::assert_that("generic_index_first_compound" %in% names(params))
  assertthat::assert_that(class(params$generic_index_first_compound) == "numeric")
  assertthat::assert_that(length(params$generic_index_first_compound) == 1)
}

# Regex sample filter need to be one string
if ("filter_regex" %in% names(params)){
  assert_that(class(params$filter_regex) == "character" &&
                length(params$filter_regex) == 1)
}

# Study_variables needs to be a list, including only lists and strings
if ("study_variables" %in% names(params)){
  STUDY_VARIABLES <- params$study_variables

  is_lists_and_strings <- function(l){
    all(lapply(l, function(x){if(is.list(x)){ is_lists_and_strings(x) } else if (is.character(x)) { TRUE  } else { FALSE }}))
  }

  make.names.recursive <- function(l){
    l <- lapply(l, function(x){if(is.list(x)){ make.names.recursive(x) } else { make.names(x) }})
    names(l) <- make.names(names(l))
    return(l)
  }

  if (length(STUDY_VARIABLES) > 0){
    assert_that(class(STUDY_VARIABLES) == "list")
    STUDY_VARIABLES <- make.names.recursive(STUDY_VARIABLES)
    if(!is_lists_and_strings(STUDY_VARIABLES)){
      stop(paste(
        "Invalid data in study variables (use strings or named lists of strings):\n",
        paste(dput(STUDY_VARIABLES), collapse = ", ")))
    }
  }
}

# profiling_variables needs to be a vector, including only strings
if ("profiling_variables" %in% names(params)){
  PROFILING_VARIABLES <- params$profiling_variables
  if (length(PROFILING_VARIABLES) > 0){
    assert_that(class(PROFILING_VARIABLES) == "character")
    PROFILING_VARIABLES <- make.names(PROFILING_VARIABLES)
  }
}

# replicate_variables needs to be a vector, including only strings
if ("replicate_variables" %in% names(params)){
  REPLICATE_VARIABLES <- params$replicate_variables
  if (length(REPLICATE_VARIABLES) > 0){
    assert_that(class(REPLICATE_VARIABLES) == "character")
    REPLICATE_VARIABLES <- make.names(REPLICATE_VARIABLES)
  }
}

# Pool indicator needs to be one string (column name)
if ("pool_indicator" %in% names(params)){
  POOL_INDICATOR <- params$pool_indicator
  if (is.na(POOL_INDICATOR) || is.null(POOL_INDICATOR) || POOL_INDICATOR == ""){
    POOL_INDICATOR <- "Sample.Identification"
  }
  else {
    assert_that(class(POOL_INDICATOR) == "character" &&
                  length(POOL_INDICATOR) == 1)
  }
} else {
  POOL_INDICATOR <- "Sample.Identification"
}
POOL_INDICATOR <- make.names(POOL_INDICATOR)

# Additional normalization supported: None, PQN
if ("additional_normalization" %in% names(params)){
  assert_that(params$additional_normalization %in% c("None", "PQN"))
}

# MetIDQ statuses
assertthat::assert_that("preproc_keep_status" %in% names(params))
if (IS_BIOCRATES){
  assertthat::assert_that(all(params$preproc_keep_status %in% METIDQ_STATUSES))
}

PREPROC_Q500_URINE_LIMITS <- FALSE
if (!is.null(params$preproc_q500_urine_limits)) {
  if (!is.logical(params$preproc_q500_urine_limits)) {
    stop(paste("preproc_q500_urine_limits needs to be TRUE or FALSE"))
  }
  if (params$preproc_q500_urine_limits &&
      (params$kit != KIT_BIOCRATES_Q500 ||
       params$measurement_type != "LC")) {
    stop(paste("Urine quantification limits update only available for",
               KIT_BIOCRATES_Q500, "- LC data."))
  }
  PREPROC_Q500_URINE_LIMITS <- params$preproc_q500_urine_limit
}


# Filter thresholds
if (!is.null(params$filter_compound_qc_ref_max_mv_ratio)) {
  assertthat::assert_that(
    0 <= params$filter_compound_qc_ref_max_mv_ratio && params$filter_compound_qc_ref_max_mv_ratio <= 1
  )
}
if (!is.null(params$filter_compound_qc_ref_max_rsd)) {
  assertthat::assert_that(
    0 <= params$filter_compound_qc_ref_max_rsd && params$filter_compound_qc_ref_max_rsd <= 100
  )
}
if (!is.null(params$filter_compound_qc_pool_max_mv_ratio)) {
  assertthat::assert_that(
    0 <= params$filter_compound_qc_pool_max_mv_ratio && params$filter_compound_qc_pool_max_mv_ratio <= 1
  )
}
if (!is.null(params$filter_compound_qc_pool_max_rsd)) {
  assertthat::assert_that(
    0 <= params$filter_compound_qc_pool_max_rsd && params$filter_compound_qc_pool_max_rsd <= 100
  )
}
if (!is.null(params$filter_compound_bs_max_mv_ratio)) {
  assertthat::assert_that(
    0 <= params$filter_compound_bs_max_mv_ratio && params$filter_compound_bs_max_mv_ratio <= 1
  )
}
if (!is.null(params$filter_sample_max_mv_ratio)) {
  assertthat::assert_that(
    0 <= params$filter_sample_max_mv_ratio && params$filter_sample_max_mv_ratio <= 1
  )
}

# Data tables visibility
if (!is.null(params$data_tables)) {
  assertthat::assert_that(params$data_tables %in% c("all", "stats", "none"))
  assign("TABLE_DISPLAY", params$data_tables, ENV)
}

# Data tables export
if (!is.null(params$data_export_long)) {
  assertthat::assert_that(is.logical(params$data_export_long))
}
if (!is.null(params$data_export_wide)) {
  assertthat::assert_that(is.logical(params$data_export_wide))
}
if (!is.null(params$data_export_prefix)) {
  assertthat::assert_that(is.character(params$data_export_prefix))
}

# Optional metadata import and modifications
ANY_METADATA_MODS <- FALSE
if (!is.null(params$metadata_import)) {
  assertthat::assert_that(file.exists(params$metadata_import))
  assertthat::assert_that(length(params$metadata_import_overlap) == 1)
  assertthat::assert_that(
    params$metadata_import_overlap %in% c("rename", "replace", "omit")
  )
  ANY_METADATA_MODS <- TRUE
}
if (!is.null(params$metadata_name_mods_org)) {
  assertthat::assert_that(length(params$metadata_name_mods_org) > 0)
  assertthat::assert_that(class(params$metadata_name_mods_org) == "character")
  assertthat::assert_that(!is.null(names(params$metadata_name_mods_org)))
  assertthat::assert_that(!any(is.na(names(params$metadata_name_mods_org))))
  ANY_METADATA_MODS <- TRUE
}
if (!is.null(params$metadata_name_mods_add)) {
  assertthat::assert_that(length(params$metadata_name_mods_add) > 0)
  assertthat::assert_that(class(params$metadata_name_mods_add) == "character")
  assertthat::assert_that(!is.null(names(params$metadata_name_mods_add)))
  assertthat::assert_that(!any(is.na(names(params$metadata_name_mods_add))))
  ANY_METADATA_MODS <- TRUE
}
if (!is.null(params$metadata_value_mods)) {
  assertthat::assert_that(length(params$metadata_value_mods) > 0)
  assertthat::assert_that(is.list(params$metadata_value_mods))
  assertthat::assert_that(!is.null(names(params$metadata_value_mods)))
  assertthat::assert_that(!any(is.na(names(params$metadata_value_mods))))
  for (value_mods in params$metadata_value_mods){
    assertthat::assert_that(length(value_mods) > 0)
    assertthat::assert_that(!is.null(names(value_mods)))
    assertthat::assert_that(!any(is.na(names(value_mods))))
    assertthat::assert_that(
      class(value_mods) %in% c("character", "complex", "integer", "logical", "numeric")
    )
  }
  ANY_METADATA_MODS <- TRUE
}

# Optional low concentration analysis
LOWCON <- FALSE
LOWCON_CONDITIONS <- params$lowcon_conditions
LOWCON_SD_OUTLIER_REM <- params$lowcon_sd_outlier_removal
LOWCON_MINIMUM_INTENSITY <- params$lowcon_minimum_intensity
if (!is.null(LOWCON_CONDITIONS)){
  assertthat::assert_that(length(LOWCON_CONDITIONS) > 0)
  assertthat::assert_that(class(LOWCON_CONDITIONS) == "character")
  assertthat::assert_that(!any(is.na(LOWCON_CONDITIONS)))
  LOWCON_CONDITIONS <- make.names(LOWCON_CONDITIONS)
  assertthat::assert_that(LOWCON_SD_OUTLIER_REM %in% c(TRUE, FALSE))
  assertthat::assert_that(LOWCON_MINIMUM_INTENSITY >= 0)
  LOWCON <- TRUE
}


# Collect all supplied study variables
# (add to global ENV for access as default in functions)
assign(
  "ALL_VARIABLES",
  unique(c(
    PROFILING_VARIABLES,
    unlist(STUDY_VARIABLES, recursive = TRUE),
    REPLICATE_VARIABLES,
    LOWCON_CONDITIONS
  )),
  ENV
)


bihealth/metaquac documentation built on Aug. 7, 2021, 8:40 a.m.