R/core-datacheck-functions.R

#' Check completeness of dataset attributes
#'
#' @description Procedure returns NULL if dataset complete or raises error if
#'   required columns are missing
#'
#' @export
#'
#' @param ds_name dataset name
#' @param ds_col_names vector of columns from dataset that needs to be checked for
#'   completeness
#'   
check.ds_cols <- function(ds_name, ds_col_names) {
  missing <- get.ds_missing_cols(ds_name, ds_col_names)
  if (length(missing) > 0) {
    collapsed <- paste(missing, collapse = ",")
    stop(
      paste("Dataset", ds_name, "has incomplete. Missing columns:", collapsed)
    )
  }
}

#' Find list of missing columns
#'
#' @description Function returns vector of missing columns (among given columns
#'   as an input parameter) in a given dataset. If all columns are present then
#'   function returns empty vector. Requirements are taken from packages internal dataset \link{data.dataset.req.attrs}
#'
#' @export
#'
#' @param ds_name Dataset Name
#' @param ds_col_names vector of Dataset columns
get.ds_missing_cols <- function(ds_name, ds_col_names) {
  # FIXME: See `?subset()`
  # Warning
  # This is a convenience function intended for use interactively. For
  # programming it is better to use the standard subsetting functions like [,
  # and in particular the non-standard evaluation of argument subset can have
  # unanticipated consequences.
  requirements <- subset(
    # FIXME: 
    # `pacta::dataset.req.attrs` does not exist.
    # Do you mean `pacta::data.dataset.req.attrs`?
    pacta::dataset.req.attrs,
    Dataset == ds_name
  )
  req.attr <- requirements$Attribute
  # required attributes which are not in subject to check
  req.attr[!req.attr %in% ds_col_names]
}

#' Substitute parts of string
#' 
#' @param str string to modify
#' @param subs list of substitutions in format `list(name = value)`
#'   where `name` is pattern and `value` is replacement.
susbtitute.string <- function(str, subs) {
  new_val <- Reduce(
    f = function(z, x) {
      stringr::str_replace(z, stringr::fixed(paste0("{", x, "}")), subs[[x]])
    },
    x = names(substitutions),
    init
  )
  # recursive call
  if (string == new_val) {
    new_val
  } else {
    substitute.string(new_val, substitutions)
  }
  
}

#' List of files for each dataset from requirements
#'
#' @param ds_name dataset name
#' @param files vector of file names
#' @param substitutions list of substitutions (list(keyword = value)), so
#'   {keyword} entries can be replaced in vector of filenames
get.ds_file_names <- function(ds_name, files, substitutions) {
  NULL
  # use dirname(), basename()
}

#' Number of files for each dataset from requirements
#'
#' Function checks vector of given files (with full path) and returns number of
#' files that provide data for specified dataset.
#'
#' @param ds_name Dataset name
#' @param files list of files to check if they serve as a source for dataset.
#'   
get.ds_file_count <- function(ds_name, files) {
  NULL
}
2DegreesInvesting/pacta documentation built on June 17, 2019, 12:04 p.m.