#' Calculate how much data is missing for each patient, visit and test battery.
#'
#' @param missing_data_percentages The tibble generated by calculate_missing_data.
#' @param id_var A quosure representing the column that contains the patient label.
#' @param site_var A quosure representing the column that contains the site name for that patient.
#' @param patient_annotations The tibble representing attendance at visits (containing the patient label, hospital, visit annotation and all dates).
#' @param annotation_threshold The threshold of percentage missing data (expressed as a number between 0 and 100) for annotation. The default is -1 (therefore showing all percentages).
#' @param width.expansion A coefficient to control the width of the panels for each site
#' @param first_column.width A measurement (using unit()) for the width of the row labels
#' @param panel_sizes The relative sizes (heights) of the two panels
#'
#' @importFrom cowplot plot_grid
#' @importFrom dplyr pull
#' @importFrom dplyr filter
#' @importFrom dplyr mutate
#' @importFrom dplyr %>%
#' @importFrom ggplot2 facet_grid
#' @importFrom ggplot2 geom_text
#' @importFrom ggplot2 geom_tile
#' @importFrom ggplot2 geom_bar
#' @importFrom ggplot2 ggplot_build
#' @importFrom ggplot2 ggplot_gtable
#' @importFrom ggplot2 theme
#' @importFrom ggplot2 element_text
#' @importFrom ggplot2 element_blank
#' @importFrom ggplot2 scale_alpha_manual
#' @importFrom ggplot2 scale_fill_distiller
#' @importFrom forcats fct_rev
#' @importFrom grid grid.draw
#' @importFrom grid unit
#' @importFrom RColorBrewer brewer.pal
#'
#' @export
plot_missing_data = function( missing_data_percentages,
id_var,
site_var,
patient_annotations,
annotation_threshold = -1,
width.expansion = 15,
first_column.width = unit(4,"cm"),
panel_sizes=c(1,1,1) ) {
panel_widths = missing_data_percentages %>%
group_by( !!site_var ) %>%
summarise( n_patients = length(unique( !!id_var ) ) )
### Patient annotations
patient_annotations.d = patient_annotations %>%
pivot_longer( -c( {{id_var }},
{{site_var}} ),
names_to = "variable",
values_to = "value" )
patient_annotations_plot = ggplot( patient_annotations.d,
aes( x={{id_var}},
y=.data$variable,
fill=.data$value ) ) +
geom_tile( col ="white",
size =1,
na.rm=TRUE) +
xlab( "" ) +
ylab( "" ) +
facet_grid( {{site_var}}, scales="free" ) +
scale_fill_manual( values=c( "TRUE"= "black",
"FALSE" = NA),
guide=FALSE ) +
theme_bw( ) +
theme( axis.text.x = element_blank() ) +
theme( plot.margin = unit(c(0, 5.5, 0, 5.5), "pt")) # TOP RIGHT BOTTOM LEFT
#theme( axis.text.x = element_text(angle=90) )
patient_annotations_plot.corrected = rejig_panel_widths( p=patient_annotations_plot,
w=panel_widths$n_patients,
w.expansion = width.expansion )
### Patient level plot (over all missing data)
patient_plot.d = missing_data_percentages %>%
select( {{id_var}}, {{site_var}}, .data$perc_missing ) %>% unique %>%
mutate( perc_present = 100-.data$perc_missing ) %>%
pivot_longer( -c({{id_var}}, {{site_var}}),
names_to = "variable",
values_to = "value" )
patient_plot = ggplot( patient_plot.d,
aes( x={{id_var}},
y=.data$value,
alpha=.data$variable ) ) +
geom_bar( fill=brewer.pal(3,"Paired")[2],
stat="identity" ) +
xlab( "" ) +
ylab( "% data present (overall)" ) +
facet_grid( {{site_var}}, scales="free" ) +
scale_alpha_manual( values=c( "perc_missing" = 0.3,
"perc_present" = 1 ),
guide=FALSE ) +
theme_bw( ) +
theme( axis.text.x = element_blank() ) +
theme( plot.margin = unit(c(0, 5.5, 0, 5.5), "pt")) # TOP RIGHT BOTTOM LEFT
patient_plot.corrected = rejig_panel_widths( p=patient_plot,
w=panel_widths$n_patients,
w.expansion = width.expansion )
patient_visit_plot.d = missing_data_percentages %>%
select( {{id_var}}, {{site_var}}, .data$param_visit, .data$Label_Visit_perc_missing ) %>%
unique %>%
mutate( perc_missing = .data$Label_Visit_perc_missing ) %>%
select( -.data$Label_Visit_perc_missing ) %>%
mutate( perc_present = 100-.data$perc_missing ) %>%
pivot_longer( -c({{id_var}}, {{site_var}}, .data$param_visit),
names_to = "variable",
values_to = "value" ) %>%
filter( !is.na( .data$param_visit ) ) %>%
filter( .data$variable == "perc_missing" ) %>%
mutate( param_visit = fct_rev(.data$param_visit) ) #%>%
#mutate( value = na_if( value, 0 ) )
# patient_visit_plot =
# ggplot( patient_visit_plot.d,
# aes( x=Label,
# y=value,
# alpha=variable,
# fill=Hospital ) ) +
# geom_bar( stat="identity" ) +
# facet_grid(param_visit~.) +
# scale_alpha_manual( values=c( perc_missing = 0.3,
# perc_present = 1 ) ) +
# theme( axis.text.x = element_text( angle=90 ) )
patient_visit_plot = ggplot( patient_visit_plot.d ,
aes( x ={{id_var}},
y =.data$param_visit,
fill =.data$value,
label=sprintf( "%d", round(.data$value) ) ) ) +
geom_tile( col ="white",
size =1,
na.rm=TRUE) +
geom_text( data=patient_visit_plot.d %>% filter( .data$value>=annotation_threshold ) ) +
theme_bw( ) +
facet_grid( {{site_var}}, scales="free" ) +
xlab( "" ) +
ylab( "% data present (at each visit)" ) +
scale_fill_distiller(palette = "YlOrRd",
direction=1,
na.value = "white",
guide=FALSE ) +
# scale_colour_manual( values=c("TRUE"="black",
# "FALSE"="white")) +
theme( axis.text.x = element_text( angle=90 ) ) +
theme( plot.margin = unit(c(0, 5.5, 0, 5.5), "pt")) # TOP RIGHT BOTTOM LEFT
patient_visit_plot.corrected = rejig_panel_widths( p=patient_visit_plot,
w=panel_widths$n_patients,
w.expansion = width.expansion )
final_plot = plot_grid( patient_plot.corrected ,
patient_annotations_plot.corrected,
patient_visit_plot.corrected,
ncol=1,
rel_heights=panel_sizes)
plot(final_plot)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.