# Original values --------


  cat(glue('**Original values of {current_cat}**'))

  cat('\n')
  cat('\n')

   ## Table --------

    table_summ <- data_intermediate %>%
      group_by(cohort, visit) %>%
      summarise(n = n(),
                n_missing = sum(is.na(source_value)),
                min = min(source_value, na.rm = TRUE),
                Q1 = quantile(source_value, probs = c(0.25), na.rm = TRUE),
                median = median(source_value, na.rm = TRUE),
                Q3 = quantile(source_value, probs = c(0.75), na.rm = TRUE),
                max = max(source_value, na.rm = TRUE),
                mean = mean(source_value, na.rm = TRUE),
                std = sd(source_value, na.rm = TRUE)) %>%
      ungroup() %>%
      mutate(n = format(n, big.mark = ','),
             n_missing = format(n_missing, big.mark = ','),
             min = round(min, digits = 1),
             Q1 = round(Q1, digits = 1),
             median = round(median, digits = 1),
             Q3 = round(Q3, digits = 1),
             max = round(max, digits = 1),
             mean = round(mean, digits = 1),
             std = round(std, digits = 1))

  kable(table_summ) %>%
    kable_styling()


  cat('\n')
  cat('\n')


 ## Plot --------


  data_intermediate %>%
    filter(cohort == current_cohort) %>%
    ggplot(aes(x = source_value)) + 
    geom_histogram(bins = 30) +
    ggtitle(glue('Original values for {current_cohort}')) +
    theme_bw() +
    xlab(source_units)

  cat('\n')
  cat('\n')




  # Harmonized values --------

  cat(glue('**Harmonized values of {current_cat}**'))

  cat('\n')
  cat('\n')

  ## Table ---------

      table_summ <- data_intermediate %>%
      group_by(cohort, visit) %>%
      summarise(n = n(),
                n_missing = sum(is.na(value)),
                min = min(value, na.rm = TRUE),
                Q1 = quantile(value, probs = c(0.25), na.rm = TRUE),
                median = median(value, na.rm = TRUE),
                Q3 = quantile(value, probs = c(0.75), na.rm = TRUE),
                max = max(value, na.rm = TRUE),
                mean = mean(value, na.rm = TRUE),
                std = sd(value, na.rm = TRUE)) %>%
      ungroup() %>%
      mutate(n = format(n, big.mark = ','),
             n_missing = format(n_missing, big.mark = ','),
             min = round(min, digits = 1),
             Q1 = round(Q1, digits = 1),
             median = round(median, digits = 1),
             Q3 = round(Q3, digits = 1),
             max = round(max, digits = 1),
             mean = round(mean, digits = 1),
             std = round(std, digits = 1))

  kable(table_summ) %>%
    kable_styling()


  cat('\n')
  cat('\n')


  ## Plot ----------

  data_intermediate %>%
    filter(cohort == current_cohort) %>%
    ggplot(aes(x = value)) + 
    geom_histogram(bins = 30) +
    ggtitle(glue('Harmonized values for {current_cohort}')) +
    theme_bw() +
    xlab(units)

  cat('\n')
  cat('\n')


Try the psHarmonize package in your browser

Any scripts or data that you put into this service are public.

psHarmonize documentation built on April 4, 2025, 1:50 a.m.