R/getStudiesSTSTDTC.R

Defines functions getStudiesSTSTDTC

Documented in getStudiesSTSTDTC

################################################################################
## The function getStudiesSTSTDTC.
##
## History:
## -----------------------------------------------------------------------------
## Date         Programmer            Note
## ----------   --------------------  ------------------------------------------
## 2020-12-04   Bo Larsen             Initial version
################################################################################

#' Extract a list of SEND studies with study start date within a specified
#' interval - or just add actual study start date for each study
#'
#' Returns a data table with the list of study ids from TS where the value of
#' TSVAL for the TSPARMCD 'STSTDTC' is within a a given date interval.\cr
#' If the \code{fromDTC} and\code{toDTC} are empty (null, na or empty string)
#' - all rows for the TSPARMCD 'STSTDTC' are returned.
#'
#' Extracts the set of study ids from TS where the value of TSVAL for the
#' TSPARMCD 'STSTDTC' falls within a specified start/end date interval in IS8601
#' format (input parameters \code{fromDTC}/\code{toDTC}).\cr
#'
#' Both complete and incomplete input start/end dates can be handled.
#' \itemize{
#'   \item If only a year is specified - the date set to the first of January that
#'    year.
#'   \item If only a year and month is specified - the date set to the first day
#'   in that month.
#'   \item If a time part is included in a specified input start/end date, it is
#'   ignored.
#' }
#'
#' If both a start and end input date are specified - all the STUDYID values
#' from TS where TSVAL for TSPARMCD 'STSTDTC' is with the interval of the
#' specified start/end date interval are extracted and returned - including the
#' values equal to the start/end dates. are included.\cr
#'
#' If only a start input date is specified - all the STUDYID values from TS
#' where TSVAL for TSPARMCD 'STSTDTC' is equal to or later than the input date
#' are extracted and returned.\cr
#'
#' If only an end date is specified - all the STUDYID values from TS where TSVAL
#' for TSPARMCD 'STSTDTC' is equal to or earlier than the are date are extracted
#' and returned.\cr
#'
#' If a data table with a list of studies is specified in \code{studyList}, only
#' the subset of studies included in that set is processed.\cr
#'
#' If input \code{inclUncertain} is TRUE, uncertain studies are included in the
#' output set. These uncertain situations are identified and reported (in column
#' UNCERTAIN_MSG):
#' \itemize{
#' \item TS contains now row for TSPARMCD='STSTDTC'
#' \item TSVAL contains an invalid ISO8601 date format for TSPARMCD='STSTDTC'
#' }
#' The same checks are performed and reported in column NOT_VALID_MSG if
#' \code{fromDTC} and \code{toDTC} are empty and
#' \code{noFilterReportUncertain=TRUE}.
#'
#' @param dbToken Mandatory.\cr
#'   Token for the open database connection (see \code{\link{initEnvironment}}).
#' @param studyList Optional.\cr
#'  A data.table with the list of studies to process. If empty, all studies in
#'  the data base are processed \cr
#'  The table must include at least a column named 'STUDYID'.
#' @param fromDTC  Optional (either or both of \code{fromDTC} and \code{toDTC}
#'   must be filled).\cr
#'   The start of the date interval to extract - must be in ISO8601 date format.
#' @param toDTC Optional (either or both of \code{fromDTC} and \code{toDTC} must be filled).\cr
#'   The end of the date interval to extract - must be in ISO8601 date format.
#' @param inclUncertain Mandatory, boolean.\cr
#'   Indicates whether study ids with STSTDTC which are are missing or wrong
#'   shall be included or not in the output data table.
#' @param noFilterReportUncertain  Mandatory, boolean\cr
#'  Only relevant if the \code{fromDTC} and\code{toDTC} are empty.\cr
#'  Indicates if the reason should be included if the STSTDTC cannot be
#'  confidently decided for an animal.
#'
#' @return The function return a data.table with columns:
#'   \itemize{
#'   \item STUDYID       (character)
#'   \item Additional columns contained in the \code{studyList} table (if such an input
#'   table is given)
#'   \item STSTDTC       (character - ISO8601 format)
#'   \item UNCERTAIN_MSG (character)\cr
#' Only included when parameter \code{inclUncertain=TRUE}.\cr
#' Contains indication of whether STSTDTC is missing of has wrong
#' format.\cr
#' Is NA for rows where SDESIGN is valid.\cr
#' A non-empty UNCERTAIN_MSG value generated by this function is merged with
#' non-empty UNCERTAIN_MSG values which may exist in the optional input set of
#' studies specified in \code{studyList} - separated by '|'.
#'   \item NOT_VALID_MSG (character)\cr
#' Included when parameter \code{noFilterReportUncertain=TRUE}.\cr
#' In case the STSTDTC cannot be confidently decided, the column contains an
#' indication of the reason.\cr
#' Is NA for rows where STSTDTC can be confidently decided.\cr
#' A non-empty NOT_VALID_MSG value generated by this function is merged with
#' non-empty NOT_VALID_MSG values which may exist in the input set of studies
#' specified in \code{studyList} - separated by '|'.
#' }
#'
#' @export
#'
#' @examples
#' \dontrun{
#' GetStudyListSTSTDTC(myDbToken, allSTudies, '2018','2020')
#' }

getStudiesSTSTDTC <- function(dbToken,
                              studyList=NULL,
                              fromDTC=NULL,
                              toDTC=NULL,
                              inclUncertain=FALSE,
                              noFilterReportUncertain = TRUE) {
  # Evaluate input parameters
  if ((is.null(fromDTC) | isTRUE(is.na(fromDTC)) | isTRUE(fromDTC=="")) & (is.null(toDTC) | isTRUE(is.na(toDTC)) | isTRUE(toDTC=="")))
    execFilter <- FALSE
  else {
    execFilter <- TRUE
    if ((!(is.null(fromDTC) | isTRUE(is.na(fromDTC)) | isTRUE(fromDTC==""))
         & isTRUE(is.na(parsedate::parse_iso_8601(fromDTC))))
        | (!(is.null(toDTC) | isTRUE(is.na(toDTC)) | isTRUE(toDTC==""))
           & isTRUE(is.na(parsedate::parse_iso_8601(toDTC)))))
      stop("The value(s) specified for fromDTC and/or toDTC is not a valid ISO8601 date")
  }

  if (execFilter & !(inclUncertain %in% c(TRUE,FALSE))) {
    stop("Parameter inclUncertain must be either TRUE or FALSE")
  }
  if (!execFilter & !(noFilterReportUncertain %in% c(TRUE,FALSE))) {
    stop("Parameter noFilterReportUncertain must be either TRUE or FALSE")
  }

  studyListIncl<-FALSE
  if (data.table::is.data.table(studyList)) {
    # An initial list of studies is included
    studyListIncl<-TRUE
  }

  # Extract TS parameter STSTDTC
  # - include a row for each for study which may miss a STSTDTC parameter
  tsSTSTDTC <-
    genericQuery(dbToken, "select ts0.studyid,
                                  case
                                    when ts1.tsval = '' then null
                                    else ts1.tsval
                                  end as STSTDTC
                             from (select distinct STUDYID from ts) ts0
                             left join ts ts1
                               on ts0.studyid = ts1.studyid
                              and ts1.tsparmcd = 'STSTDTC'")

  if (studyListIncl) {
    # Limit to the set of studies given as input
    tsSTSTDTC<-data.table::merge.data.table(tsSTSTDTC, studyList[,c('STUDYID')], by='STUDYID')
  }

  # Check if a message column for uncertainties shall be included
  msgCol =''
  if (execFilter & inclUncertain)
    msgCol = 'UNCERTAIN_MSG'
  else {
    if (!execFilter & noFilterReportUncertain)
      msgCol = 'NOT_VALID_MSG'
  }

  if (msgCol != '') {
    # Check STSTDTC value for uncertainty for each extracted row,
    # i.e. is missing or has invalid ISO8601 format
    tsSTSTDTC[, MSG :=  ifelse(is.na(parsedate::parse_iso_8601(STSTDTC)),
                                                ifelse(is.na(STSTDTC),
                                                       'STSTDTC: TS parameter STSTDTC is missing',
                                                       'STSTDTC: TS parameter STSTDTC has wrong format'),
                                                as.character(NA))]
    # Rename MSG col to correct name
    data.table::setnames(tsSTSTDTC, 'MSG' ,msgCol)
  }

  if (execFilter) {
    # Execute filtering

    # Construct the statement to apply the specified date interval
    dtcFilter<-NA
    if (!(is.null(fromDTC) | isTRUE(is.na(parsedate::parse_iso_8601(fromDTC))))) {
      # The filter condition for the fromDTC
      dtcFilter<-"as.Date(parsedate::parse_iso_8601(STSTDTC)) >= as.Date(parsedate::parse_iso_8601(fromDTC))"
    }
    if (!(is.null(toDTC) | isTRUE(is.na(parsedate::parse_iso_8601(toDTC))))) {
      # Check the granularity if the specified toDTC and a 1 year/month/day to end of the interval to extract
      if (nchar(toDTC)==4) {
        # Only year has been specified + add 1 year to the date
        toDTCdate<-DescTools::AddMonths(as.Date(parsedate::parse_iso_8601(toDTC)),12)
      }
      else if (nchar(toDTC)==7) {
        # Only year and month has been specified -  add 1 month to the date
        toDTCdate<-DescTools::AddMonths(as.Date(parsedate::parse_iso_8601(toDTC)),1)
      }
      else {
        # A full date has been specified - add one day
        toDTCdate<-as.Date(parsedate::parse_iso_8601(toDTC))+1
      }
      if (is.na(dtcFilter)) {
        # only toDTC filter part
        dtcFilter<-"as.Date(parsedate::parse_iso_8601(STSTDTC)) < toDTCdate"
      }
      else {
        # Add this filter part to the fromDTC filter part
        dtcFilter<-paste(dtcFilter," & as.Date(parsedate::parse_iso_8601(STSTDTC)) < toDTCdate",sep="")
      }
    }
    if (inclUncertain)
      # Include condition for inclusion of identified uncertain rows
      dtcFilter<-paste(paste("(", dtcFilter), ") | ! is.na(UNCERTAIN_MSG)")

    # Build the statement to extract studies fulfilling the condition(s) and execute
    foundStudies<-eval(parse(text=paste0('tsSTSTDTC[',dtcFilter,']')))
  }
  else
    foundStudies <- tsSTSTDTC

  if (studyListIncl) {
    # Merge the list of extracted studies with the input set of studies to keep
    # any additional columns from the input table
    foundStudies<-data.table::merge.data.table(studyList, foundStudies, by='STUDYID')

    # Do final preparation of set of found studies and return
    prepareFinalResults(foundStudies, names(studyList), c('STSTDTC'))
  }
  else
    # Initial list if extracted studies
    # Do final preparation of set of found studies and return
    prepareFinalResults(foundStudies, '', c('STUDYID', 'STSTDTC'))
}

Try the sendigR package in your browser

Any scripts or data that you put into this service are public.

sendigR documentation built on Aug. 18, 2022, 9:07 a.m.