R/extract_complex_dependencies.R

Defines functions extract_complex_dependencies

Documented in extract_complex_dependencies

#' Extract complex dependencies.
#'
#' @param this_data The tibble to be checked for missing data.
#' @param this_glossary The glossary dictionary with parameter visit/test battery etc information.
#' @param id_var A quosure representing the column that contains the patient label.
#' @param site_var A quosure representing the column that contains the site name for that patient.
#' @param these_dependencies A dependencies object (likely generated by parse_MACRO_DSD_file_for_glossary_and_dependencies()).
#'
#' @importFrom dplyr pull
#' @importFrom dplyr filter
#' @importFrom dplyr bind_rows
#' @importFrom dplyr mutate
#' @importFrom dplyr rename
#' @importFrom dplyr %>%
#' @importFrom rlang .data
#' @importFrom rlang sym
#'
#' @return Missing data percentages in a tibble
#' @export
extract_complex_dependencies = function( this_data,
                                         this_glossary,
                                         id_var,
                                         site_var,
                                         these_dependencies ) {

  ##################################################################
  ##################################################################
  ### FOR THE EXPECTED DATA (IE THOSE DATA WITHOUT DEPENDENCIES) ###
  ##################################################################
  ##################################################################

  patient_list    = this_data %>% pull( !!id_var ) %>% unique %>% as.character

  dependency_info = these_dependencies %>%
    filter( .data$param_dependencies!="NA" | .data$param_dependencies_manual!="NA" ) %>%
    select( .data$column_name, .data$param_dependencies, .data$param_dependencies_manual, .data$param_closedvocab_list )

  missing_data.output = tibble(
    !!id_var    :=character(),
    !!site_var  :=character(),
    column_name  =character(),
    value        =character()
  )

  for ( i in 1:nrow(dependency_info) ) {

    ### For those simple, one
    this.dependency_string        = dependency_info[ i, "param_dependencies" ]
    this.dependency_string_manual = dependency_info[ i, "param_dependencies_manual" ]
    this.column_name              = dependency_info[ i, "column_name"        ]

    this.column_exists = ( this_glossary %>%
                             filter( .data$column_name == this.column_name ) %>%
                             nrow ) != 0

    if ( this.column_exists ) {

      #cat( sprintf( "BEFORE [%d] (%s)\n", i, this.dependency_string ) )


      this.dependency_string.updated = resolve_individual_filter_statement( this.dependency_string,
                                                                            this.column_name,
                                                                            this_glossary,
                                                                            these_dependencies )

      this.dependency_string_manual.updated = resolve_individual_filter_statement( this.dependency_string_manual,
                                                                                   this.column_name,
                                                                                   this_glossary,
                                                                                   these_dependencies )

      #cat( sprintf( "AFTER [%d] (%s)\n", i, this.dependency_string.updated ) )

      this.dependency_string.combined = c( this.dependency_string.updated,
                                           this.dependency_string_manual.updated ) %>%
        discard( function(x) x %in% c( "NA", "" ) ) %>%
        sprintf(fmt="( %s )") %>%
        paste( collapse=" & " )

      if ( !is.na( this.dependency_string.combined ) & this.dependency_string.combined != "" ) {

        tmp.data = this_data %>%
          filter(eval(rlang::parse_expr(this.dependency_string.combined))) %>%
          select ( {{id_var}}, {{site_var}}, !!sym(this.column_name) ) %>%
          mutate( column_name = this.column_name ) %>%
          rename( value = {{this.column_name}}) %>%
          select( {{id_var}}, {{site_var}}, .data$column_name, .data$value ) %>%
          mutate_all( as.character )

        missing_data.output = missing_data.output %>%
          bind_rows( tmp.data )

      }
    } else {
      print_warning( c( sprintf( "Dependency string is placed on a column [%s]",
                                 this.column_name ),
                        "that doesn't exist in the database.",
                        "Dependency is removed.") )
    }

  }

  # if ( str_count( this.dependency_string, "=" ) == 1 &
  #      !str_detect( this.dependency_string, " and " ) &
  #      !str_detect( this.dependency_string, " or " ) ) {
  #   #print_message( sprintf("[%d] Uni-dimensional dependency for %s",
  #   #                       i,
  #   #                       this.column_name ) )
  #
  #   tmp.split = str_split( this.dependency_string, "=") %>% unlist
  #
  #   this.field = tmp.split[1]
  #   this.value = tmp.split[2]
  #   this.vocab = these_dependencies %>%
  #     filter( column_name == this.field ) %>%
  #     pull( param_closedvocab_list ) %>%
  #     unlist %>% rev
  #
  #   ### Might it be "<>" - replace these with !=
  #
  #   ### It might be does not equal (!=)
  #   negate.flag = str_detect( this.field, "!$" )
  #   if ( negate.flag ) { this.field = str_replace( this.field, "!$", "" ) }
  #
  #   value_of_interest = this.value
  #   if ( !is.null(this.vocab) ) {
  #     value_of_interest = this.vocab[[this.value]]
  #   }
  #
  #   this.mask = this_data[,this.field] == value_of_interest
  #   if ( negate.flag ) { this.mask = !this.mask }
  #   this.mask = this.mask %>% replace_na( FALSE )
  #
  #   tmp.data = this_data[this.mask,] %>%
  #     select ( Label, Hospital, !!sym(this.column_name) ) %>%
  #     mutate( column_name = this.column_name ) %>%
  #     rename( value = {{this.column_name}}) %>%
  #     select( Label, Hospital, column_name, value ) %>%
  #     mutate_all( as.character )
  #
  #   missing_data.output = missing_data.output %>%
  #     bind_rows( tmp.data )
  #
  # } else {
  #   print_message( sprintf("[%d] Multi-dimensional dependency for %s",
  #                          i,
  #                          this.column_name ) )

  #   s.resolved = this.dependency_string
  #
  #   tmp.split_list = str_split( this.dependency_string, " and | or " ) %>% unlist
  #
  #   for ( this.split.i in 1:length(tmp.split_list) ) {
  #
  #     this.split = tmp.split_list[this.split.i]
  #
  #     if ( str_detect( this.split, "=" )  ) {
  #
  #       this.operator = "="
  #
  #       tmp.split = str_split( this.split, "=") %>% unlist
  #
  #       this.field = tmp.split[ 1 ]
  #       this.value = tmp.split[ 2 ]
  #       this.vocab = these_dependencies %>%
  #         filter( column_name == this.field ) %>%
  #         pull( param_closedvocab_list ) %>%
  #         unlist %>% rev
  #
  #       ### Check whether we are actually looking at
  #       ### != and not =
  #       if ( str_detect( this.field, "!$" ) ) {
  #         this.operator = "!="
  #         this.field    = str_replace( this.field, "!$", "" )
  #       }
  #
  #       ### Do we have vocabulary that we can refer to?
  #       if ( !is.null(this.vocab) ) {
  #
  #         new_string = ""
  #
  #         if ( this.value %in% names(this.vocab) ) {
  #           new_string = sprintf( "%s='%s'",
  #                                 this.field,
  #                                 this.vocab[[this.value]] )
  #         } else {
  #           print_warning( c( sprintf( "Illegal vocabulary for field [%s]",
  #                                      this.field ),
  #                             sprintf( "provided in dependency for [%s].",
  #                                      this.column_name ),
  #                             "Dependency will be removed." ) )
  #         }
  #
  #         s.resolved = str_replace( s.resolved,
  #                                                       this.split,
  #                                                       new_string )
  #       }
  #     }
  #
  #   }
  #
  #   s.resolved = tidy_logical_expression( s.resolved )
  #
  #   tmp.data = this_data %>%
  #     filter(eval(rlang::parse_expr(s.resolved))) %>%
  #     select ( Label, Hospital, !!sym(this.column_name) ) %>%
  #     mutate( column_name = this.column_name ) %>%
  #     rename( value = {{this.column_name}}) %>%
  #     select( Label, Hospital, column_name, value ) %>%
  #     mutate_all( as.character )
  #
  #   missing_data.output = missing_data.output %>%
  #     bind_rows( tmp.data )
  #
  #}

  # }


  return( missing_data.output )

}
LisaHopcroft/CTutils documentation built on Oct. 7, 2021, 11:08 p.m.