psHarmonize Summary Report

knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)

library(dplyr)
library(ggplot2)
library(knitr)
library(janitor)
library(kableExtra)
library(glue)

harmonized_long_dataset <- long_dataset
## Creating functions that call to a "child" Rmarkdown script
## These will create output for each element and cohort


# cont_one_visit

cont_one_visit <- function(){

  output <- tryCatch(
    {
      knitr::knit_child("./RMarkdown_child/cont_one_visit.Rmd", quiet = TRUE)
    }, error = function(e)
    {
      paste0('Output for the variable ', current_cat, ' cannot be created.\n\n')
    }, warning = function(w)
    {
      paste0('Output for the variable ', current_cat, ' cannot be created.\n\n')
    }
  )

  cat(output, '   ', sep = '\n\n')

}

# cont_multi_visit

cont_multi_visit <- function(){

    output <- tryCatch(
    {
      knitr::knit_child("./RMarkdown_child/cont_multi_visit.Rmd", quiet = TRUE)
    }, error = function(e)
    {
      paste0('Output for the variable ', current_cat, ' cannot be created.\n\n')
    }, warning = function(w)
    {
      paste0('Output for the variable ', current_cat, ' cannot be created.\n\n')
    }
  )

    cat(output, '   ', sep = '\n\n')

}

# cat_multi_visit

cat_multi_visit <- function(){

  output <- tryCatch(
    {
      knitr::knit_child("./RMarkdown_child/cat_multi_visit.Rmd", quiet = TRUE)
    }, error = function(e)
    {
      paste0('Output for the variable ', current_cat, ' cannot be created.\n\n')
    }, warning = function(w)
    {
      paste0('Output for the variable ', current_cat, ' cannot be created.\n\n')
    }
  )

  cat(output, '   ', sep = '\n\n')

}

# cat_one_visit
# Identifying which variables to output
# IDing columns that are not ID, cohort, or visit
# Also not outputting source variables at the moment

categories <- names(harmonized_long_dataset)[grepl('^cohort$|^ID$|^visit$|^source_', names(harmonized_long_dataset), ignore.case = TRUE) == FALSE]

cohorts <- unique(harmonized_long_dataset$cohort)

{.tabset}

for(current_cohort in cohorts)
{

  cat('\n')
  cat(paste0('## ', current_cohort, ' {.tabset}'))
  cat('\n')
  cat('<hr>')
  cat('\n\n')

  for(current_cat in categories)
  {

    cat('\n')
    cat(paste0('### ', current_cat, ' {.tabset}'))
    cat('\n')
    cat('<hr>')
    cat('\n\n')


    ## Identify type of variable in harmonized dataset
    ## and display the appropriate output

    num_unique_val <- length(unique(long_dataset[[current_cat]]))

    num_visits <- length(unique(long_dataset[!is.na(long_dataset$cohort) & long_dataset$cohort == current_cohort & !is.na(long_dataset[[current_cat]]),][['visit']]))


    ## Obtaining units (if present)

    if('final_units' %in% names(harmonization_sheet))
    {

      units <- harmonization_sheet %>%
        filter(study == current_cohort & item == current_cat) %>%
        filter(!is.na(final_units) & !(final_units == '') & !(final_units == ' ')) %>%
        distinct(final_units) %>%
        pull(final_units)

      if(length(units) > 1 || length(units) == 0 || is.na(units) || units == '' || units == ' ')
      {
        units <- 'value'
      }

    } else
    {
      units <- 'value'
    }


    ## Obtaining coding notes (if present)

    if('coding_notes' %in% names(harmonization_sheet))
    {

      coding_notes <- harmonization_sheet %>%
        filter(study == current_cohort & item == current_cat) %>%
        filter(!is.na(coding_notes) & !(coding_notes == '') & !(coding_notes == ' ')) %>%
        distinct(coding_notes) %>%
        pull(coding_notes)

      if(length(coding_notes) > 1 || length(coding_notes) == 0 || is.na(coding_notes) || coding_notes == '' || coding_notes == ' ')
      {
        coding_notes <- ''
      }

    } else
    {
      coding_notes <- ''
    }



    ## Preparing data for output
    data_intermediate <- long_dataset %>%
      filter(cohort == {{ current_cohort }}) %>%
      rename(value = {{ current_cat }})


    ## Creating output for continuous, multiple visits
    if(num_unique_val > 10 & num_visits > 1 & is.numeric(long_dataset[[current_cat]]))
    {

      cont_multi_visit()

      if(coding_notes != '')
      {

        cat('\n\n')
        cat('**Notes:**')
        cat('\n\n')
        cat(coding_notes)
        cat('\n\n')

      }



    ## Creating output for continuous, one visit
    } else if (num_unique_val > 10 & num_visits == 1 & is.numeric(long_dataset[[current_cat]])) {

      # Identify which visit
      visit_to_use <- data_intermediate %>%
        filter(cohort == current_cohort & !is.na(value)) %>%
        distinct(visit) %>%
        pull(visit)

      # Limit to visit
      data_intermediate <- data_intermediate %>%
        filter(visit == visit_to_use)

      cont_one_visit()

      if(coding_notes != '')
      {

        cat('\n\n')
        cat('**Notes:**')
        cat('\n\n')
        cat(coding_notes)
        cat('\n\n')

      }


    ## Creating output for categorical, multiple visits
    } else if (num_visits > 1 & is.character(long_dataset[[current_cat]]))
    {

      cat_multi_visit()

      if(coding_notes != '')
      {

        cat('\n\n')
        cat('**Notes:**')
        cat('\n\n')
        cat(coding_notes)
        cat('\n\n')

      }


    ## Creating output for categorical, one visit
    } else 
    {

      cat_multi_visit()

      if(coding_notes != '')
      {

        cat('\n\n')
        cat('**Notes:**')
        cat('\n\n')
        cat(coding_notes)
        cat('\n\n')

      }


    }

  cat('<br>\n\n')
  cat(paste0("Created using `psHarmonize` version: ", packageVersion('psHarmonize'), '\n\n'))

  }

}


Try the psHarmonize package in your browser

Any scripts or data that you put into this service are public.

psHarmonize documentation built on April 4, 2025, 1:50 a.m.