R/expand_dependency_locations_in_glossary.R

Defines functions expand_dependency_locations_in_glossary

Documented in expand_dependency_locations_in_glossary

#' Expand dependency locations in glossary
#'
#' Not all dependences in the Definitions Study Document
#' are fully defined - some are missing their visit/section
#' labels.  To link these dependencies properly, we need to
#' fill in the missing information.  This function takes an
#' existing glossary object and adds the additional dependency
#' information.
#'
#' @param glossary A glossary object (likely generated by parse_MACRO_DSD_file())
#'
#' @importFrom dplyr pull
#' @importFrom dplyr n
#' @importFrom dplyr filter
#' @importFrom dplyr mutate
#' @importFrom dplyr bind_rows
#' @importFrom dplyr %>%
#' @importFrom rlang .data
#' @importFrom stringr str_count
#' @importFrom stringr str_replace
#' @importFrom stringr str_detect
#' @importFrom stringr str_extract
#' @importFrom stringr str_extract_all
#' @importFrom stringr str_remove_all
#' @importFrom stringr str_trim
#' @importFrom tidyr fill
#' @importFrom xml2 read_xml
#' @importFrom xml2 xml_find_all
#' @importFrom xml2 xml_children
#' @importFrom xml2 xml_name
#' @importFrom xml2 xml_text
#'
#' @return The glossary with dependencies updated.
#' @export
expand_dependency_locations_in_glossary = function( glossary ) {

  glossary.tmp = glossary %>%
    mutate( rowid = 1:n() ) %>%
    mutate( param_dependencies.SAVED = .data$param_dependencies ) %>%
    mutate( amended = FALSE )

  ################################################################
  ### Check dependencies that include a visit check            ###
  ### I want to remove dependencies that are in the wrong week ###
  ################################################################

  visit.dependencies.list = glossary.tmp %>%
    filter( str_detect( .data$param_dependencies, "visit\\s*=\\s*\\(" ) ) %>%
    pull( .data$rowid )

  for ( this.dependency.i in visit.dependencies.list ) {
    this.dependency.string = glossary.tmp[ this.dependency.i, "param_dependencies" ] %>% unlist

    ### Which visit is referred to in the dependency?
    this.visit_in_dependency.phrase = str_extract( this.dependency.string,
                                            "visit\\s*=\\s*\\(.*?\\)")
    this.visit_in_dependency.string = this.visit_in_dependency.phrase %>%
      ### What is in the brackets?
      str_extract_all( "(?<=\\().+?(?=\\))" ) %>%
      unlist %>%
      str_remove_all("'") %>%
      str_trim()

    this.visit_in_glossary.string = glossary.tmp[ this.dependency.i,
                                                  "param_visit" ] %>% unlist

    glossary.tmp[ this.dependency.i,
                  "param_dependencies" ] = str_replace( this.dependency.string,
                                                        sprintf( "\\Q%s\\E",
                                                                 this.visit_in_dependency.phrase ),
                                                        as.character( this.visit_in_dependency.string == this.visit_in_glossary.string ) )
    glossary.tmp[ this.dependency.i,
                  "amended" ] = TRUE
      }


  ################################################################
  ### Change the isknown() elements of the dependencies        ###
  ################################################################

  visit.isknown.list = glossary.tmp %>%
    filter( str_detect( .data$param_dependencies, "isknown\\(" ) ) %>%
    filter( !str_detect( .data$param_dependencies, "\\(\\s*(this|previous|next)\\s*\\)" ) ) %>%
    pull( .data$rowid )

  for ( this.dependency.i in visit.isknown.list ) {
    this.dependency.string = glossary.tmp[ this.dependency.i, "param_dependencies" ] %>% unlist

    ### Which visit is referred to in the dependency?
    this.isknown_in_dependency.phrase = str_extract( this.dependency.string,
                                                   "isknown\\s*\\(.*?\\)")
    this.isknown_in_dependency.string = this.isknown_in_dependency.phrase %>%
      ### What is in the brackets?
      str_extract_all( "(?<=\\().+?(?=\\))" ) %>%
      unlist %>%
      str_remove_all("'") %>%
      str_trim()

    glossary.tmp[ this.dependency.i,
                  "param_dependencies" ] = str_replace( this.dependency.string,
                                                        sprintf( "\\Q%s\\E",
                                                                 this.isknown_in_dependency.phrase ),
                                                        sprintf( "%s!=''",
                                                                 this.isknown_in_dependency.string ) )
    glossary.tmp[ this.dependency.i,
                  "amended" ] = TRUE
  }

  ################################################################
  ### Find the dependencies that are not fully specified       ###
  ################################################################

  these.dependencies = glossary.tmp %>%
    filter( !is.na( .data$param_dependencies ) ) %>%
    ### Remove the previous or next dependencies for now
    filter( !str_detect( .data$param_dependencies, "\\(\\s*previous\\s*\\)")) %>%
    filter( !str_detect( .data$param_dependencies, "\\(\\s*next\\s*\\)")) %>%
    filter( !str_detect( .data$param_dependencies, "\\(\\s*this\\s*\\)")) %>%
    pull( .data$rowid )

  for ( this.dependency.i in these.dependencies ) {

    this.dependency.text = glossary.tmp %>%
      filter( .data$rowid==this.dependency.i ) %>%
      pull( .data$param_dependencies ) %>%
      as.character( )

    this.dependency.params = this.dependency.text %>%
      ### What parameters are there in this dependency?
      str_extract_all( "[^\\s]+\\s?=\\s?[^\\s]+" ) %>%
      unlist #%>%
      #str_replace( "\\s*=\\s*", "=")

    params.colon_count = this.dependency.params %>% str_count(":")

    this.dependency.params_corrected = this.dependency.params

    if ( any( params.colon_count < 2 ) ) {

      params.to_expand = which( params.colon_count < 2 )

      for ( this.expand.i in params.to_expand ) {

        this.v = glossary.tmp[ this.dependency.i, "param_visit"       ] %>% unlist
        this.t = glossary.tmp[ this.dependency.i, "param_testbattery" ] %>% unlist
        this.p = this.dependency.params[ this.expand.i ]

        if ( params.colon_count[ this.expand.i ]==0 ) {

          this.dependency.params_corrected[ this.expand.i ] = paste(c(this.v,
                                                                 this.t,
                                                                 this.p),
                                                               collapse=":" )

        } else if ( params.colon_count[this.expand.i]==1 ) {
          this.dependency.params_corrected[ this.expand.i ] = paste( c( this.v,
                                                                   this.p ),
                                                                collapse=":")

        }


        glossary.tmp[ this.dependency.i,
                      "param_dependencies" ] = str_replace( glossary.tmp[ this.dependency.i,
                                                                          "param_dependencies" ],
                                                            this.p,
                                                            this.dependency.params_corrected[ this.expand.i ]  )


        glossary.tmp[ this.dependency.i,
                      "amended" ] = TRUE

      }
    }
  }


  glossary.new = glossary.tmp

  return( glossary.new )

}
LisaHopcroft/CTutils documentation built on Oct. 7, 2021, 11:08 p.m.