R/plot_missing_data.R

Defines functions plot_missing_data

Documented in plot_missing_data

#' Calculate how much data is missing for each patient, visit and test battery.
#'
#' @param missing_data_percentages The tibble generated by calculate_missing_data.
#' @param id_var A quosure representing the column that contains the patient label.
#' @param site_var A quosure representing the column that contains the site name for that patient.
#' @param patient_annotations The tibble representing attendance at visits (containing the patient label, hospital, visit annotation and all dates).
#' @param annotation_threshold The threshold of percentage missing data (expressed as a number between 0 and 100) for annotation.  The default is -1 (therefore showing all percentages).
#' @param width.expansion A coefficient to control the width of the panels for each site
#' @param first_column.width A measurement (using unit()) for the width of the row labels
#' @param panel_sizes The relative sizes (heights) of the two panels
#'
#' @importFrom cowplot plot_grid
#' @importFrom dplyr pull
#' @importFrom dplyr filter
#' @importFrom dplyr mutate
#' @importFrom dplyr %>%
#' @importFrom ggplot2 facet_grid
#' @importFrom ggplot2 geom_text
#' @importFrom ggplot2 geom_tile
#' @importFrom ggplot2 geom_bar
#' @importFrom ggplot2 ggplot_build
#' @importFrom ggplot2 ggplot_gtable
#' @importFrom ggplot2 theme
#' @importFrom ggplot2 element_text
#' @importFrom ggplot2 element_blank
#' @importFrom ggplot2 scale_alpha_manual
#' @importFrom ggplot2 scale_fill_distiller
#' @importFrom forcats fct_rev
#' @importFrom grid grid.draw
#' @importFrom grid unit
#' @importFrom RColorBrewer brewer.pal
#'
#' @export
plot_missing_data = function( missing_data_percentages,
                              id_var,
                              site_var,
                              patient_annotations,
                              annotation_threshold = -1,
                              width.expansion = 15,
                              first_column.width = unit(4,"cm"),
                              panel_sizes=c(1,1,1) ) {

  panel_widths = missing_data_percentages %>%
    group_by( !!site_var ) %>%
    summarise( n_patients = length(unique( !!id_var ) ) )

  ### Patient annotations
  patient_annotations.d = patient_annotations %>%
    pivot_longer( -c( {{id_var  }},
                      {{site_var}} ),
                  names_to  = "variable",
                  values_to = "value" )

  patient_annotations_plot = ggplot( patient_annotations.d,
          aes( x={{id_var}},
               y=.data$variable,
               fill=.data$value ) ) +
    geom_tile( col  ="white",
               size =1,
               na.rm=TRUE) +
    xlab( "" ) +
    ylab( "" ) +
    facet_grid( {{site_var}}, scales="free" ) +
    scale_fill_manual( values=c( "TRUE"= "black",
                                 "FALSE" = NA),
                       guide=FALSE ) +
    theme_bw( ) +
    theme( axis.text.x = element_blank() ) +
    theme( plot.margin = unit(c(0, 5.5, 0, 5.5), "pt")) # TOP RIGHT BOTTOM LEFT
    #theme( axis.text.x = element_text(angle=90) )

  patient_annotations_plot.corrected = rejig_panel_widths( p=patient_annotations_plot,
                                                     w=panel_widths$n_patients,
                                                     w.expansion = width.expansion )

  ### Patient level plot (over all missing data)

  patient_plot.d = missing_data_percentages %>%
    select( {{id_var}}, {{site_var}}, .data$perc_missing ) %>% unique %>%
    mutate( perc_present = 100-.data$perc_missing ) %>%
    pivot_longer( -c({{id_var}}, {{site_var}}),
                  names_to = "variable",
                  values_to = "value" )

  patient_plot = ggplot( patient_plot.d,
                               aes( x={{id_var}},
                                    y=.data$value,
                                    alpha=.data$variable ) ) +
    geom_bar( fill=brewer.pal(3,"Paired")[2],
              stat="identity" ) +
    xlab( "" ) +
    ylab( "% data present (overall)" ) +
    facet_grid( {{site_var}}, scales="free" ) +
    scale_alpha_manual( values=c( "perc_missing" = 0.3,
                                  "perc_present" = 1 ),
                        guide=FALSE ) +
    theme_bw( ) +
    theme( axis.text.x = element_blank() ) +
    theme( plot.margin = unit(c(0, 5.5, 0, 5.5), "pt")) # TOP RIGHT BOTTOM LEFT

  patient_plot.corrected = rejig_panel_widths( p=patient_plot,
                                               w=panel_widths$n_patients,
                                               w.expansion = width.expansion )


  patient_visit_plot.d = missing_data_percentages %>%
    select( {{id_var}}, {{site_var}}, .data$param_visit, .data$Label_Visit_perc_missing ) %>%
    unique %>%
    mutate( perc_missing = .data$Label_Visit_perc_missing ) %>%
    select( -.data$Label_Visit_perc_missing ) %>%
    mutate( perc_present = 100-.data$perc_missing ) %>%
    pivot_longer( -c({{id_var}}, {{site_var}}, .data$param_visit),
                  names_to = "variable",
                  values_to = "value" ) %>%
    filter( !is.na( .data$param_visit ) ) %>%
    filter( .data$variable == "perc_missing" ) %>%
    mutate( param_visit = fct_rev(.data$param_visit) ) #%>%
    #mutate( value = na_if( value, 0 ) )

  # patient_visit_plot =
  # ggplot( patient_visit_plot.d,
  #         aes( x=Label,
  #              y=value,
  #              alpha=variable,
  #              fill=Hospital ) ) +
  #   geom_bar( stat="identity" ) +
  #   facet_grid(param_visit~.) +
  #   scale_alpha_manual( values=c( perc_missing = 0.3,
  #                                 perc_present = 1 ) ) +
  #   theme( axis.text.x = element_text( angle=90 ) )


  patient_visit_plot = ggplot( patient_visit_plot.d ,
                               aes( x    ={{id_var}},
                                    y    =.data$param_visit,
                                    fill =.data$value,
                                    label=sprintf( "%d", round(.data$value) ) ) ) +
    geom_tile( col  ="white",
               size =1,
               na.rm=TRUE) +
    geom_text( data=patient_visit_plot.d %>% filter( .data$value>=annotation_threshold )  ) +
    theme_bw( ) +
    facet_grid( {{site_var}}, scales="free" ) +
    xlab( "" ) +
    ylab( "% data present (at each visit)" ) +
    scale_fill_distiller(palette = "YlOrRd",
                         direction=1,
                         na.value = "white",
                         guide=FALSE ) +
    # scale_colour_manual( values=c("TRUE"="black",
    #                               "FALSE"="white")) +
    theme( axis.text.x = element_text( angle=90 ) ) +
    theme( plot.margin = unit(c(0, 5.5, 0, 5.5), "pt")) # TOP RIGHT BOTTOM LEFT



  patient_visit_plot.corrected = rejig_panel_widths( p=patient_visit_plot,
                                                     w=panel_widths$n_patients,
                                                     w.expansion = width.expansion )



  final_plot = plot_grid( patient_plot.corrected ,
                          patient_annotations_plot.corrected,
                          patient_visit_plot.corrected,
                          ncol=1,
                          rel_heights=panel_sizes)

  plot(final_plot)
}
LisaHopcroft/CTutils documentation built on Oct. 7, 2021, 11:08 p.m.