# Documentation
#' Format PROBAST data prior to plotting
#' @description Format PROBAST data prior to plotting
#' @param data PROBAST data in format as specified in "example_probast.csv".
#' @param id String specifying the column name containing the identifier for the specific paper/model.
#' @param type String specifying the column name containing the type of model ("d"=derivation, "dv" = "derivation and validation", "v" = validation)
#' @return Tibble of PROBAST data formatted for a PROBAST plot / table
#' @import tibble
#' @import dplyr
#' @import tidyr
#' @export
# Function:
probast_format <- function(data, id = "id", type = "type"){
# Clean-----------------
rob = c("rob_1_1", "rob_1_2", "rob_2_1", "rob_2_2", "rob_2_3", "rob_3_1", "rob_3_2",
"rob_3_3", "rob_3_4", "rob_3_5", "rob_3_6", "rob_4_1", "rob_4_2", "rob_4_3",
"rob_4_4", "rob_4_5", "rob_4_6", "rob_4_7", "rob_4_8", "rob_4_9")
app = c("app_1", "app_2", "app_3")
columns <- c("id", "type", rob, app)
data <- data %>%
dplyr::mutate(id = pull(., id),
type = pull(., type)) %>%
dplyr::select(id, type, starts_with("rob_"), starts_with("app_")) %>%
dplyr::select(all_of(columns))
# Check only 5 types
check_type <- data %>% dplyr::filter(! type %in% c("d", "dvi", "dve", "dvie", "ve"))
if(nrow(check_type)>0){stop(paste0("Please ensure all studies have one of 5 types assigned: d, dvi, dve, dvie, ve"))}
long <- suppressWarnings(data %>%
dplyr::mutate(across(everything(), function(x){as.character(x)})) %>%
tidyr::pivot_longer(cols = -all_of(c("id", "type")), names_to = "name") %>%
tidyr::separate(col = "name", into = c("domain", "subdomain", "item"),sep ="_")) %>%
dplyr::mutate(item = factor(as.numeric(item)),
subdomain = factor(subdomain, levels = c("5", "1","2", "3", "4"),
labels = c( "Overall","Participants", "Predictors", "Outcome", "Analysis")),
domain = factor(domain, levels = c("rob", "app"), labels = c("Risk of Bias", "Applicability")),
type = factor(type, levels = c("d", "dvi", "dve", "dvie", "ve"),
labels = c("Derivation",
"Derivation +\nValidation (Internal)",
"Derivation +\nValidation (External)",
"Derivation +\nValidation (Both)",
"Validation\n(External)")))
data_rob <- long %>%
dplyr::filter(domain == "Risk of Bias") %>%
# Filter for items relevant for specific study design
dplyr::filter(! (type=="Validation (External)"&subdomain=="Analysis"&item %in% c("5", "8", "9"))) %>%
dplyr::mutate(value = tolower(value),
value = case_when(value %in% c("y", "yes", "yes (y)") ~ "Y",
value %in% c("py", "probably yes", "probably yes (py)") ~ "PY",
value %in% c("ni", "no information", "no information (ni)") ~ "NI",
value %in% c("pn", "probably no", "probably no (pn)") ~ "PN",
value %in% c("n", "no", "no (n)") ~ "N")) %>%
dplyr::group_by(id, type, domain, subdomain) %>%
dplyr::mutate(n_y = sum(value %in% c("Y", "PY")),
n_n = sum(value %in% c("N", "PN")),
n_ni = sum(value %in% c("NI")),
assess = case_when(n_y>0&n_n==0&n_ni==0 ~ "Low",
n_y>=0&n_n==0&n_ni>0 ~ "Unclear",
n_y>=0&n_n>0&n_ni>=0 ~ "High"),
assess = factor(assess, levels = c("High", "Unclear", "Low"))) %>%
dplyr::ungroup() %>%
dplyr::select(-starts_with("n_"))
data_rob_overall <- data_rob %>%
dplyr::distinct(id,type, domain, subdomain, assess) %>%
dplyr::group_by(id,type, domain) %>%
dplyr::summarise(n_low = sum(assess=="Low", na.rm = T),
n_high = sum(assess=="High", na.rm = T),
n_unclear = sum(assess=="Unclear", na.rm = T),
assess = case_when(n_low>0&n_high==0&n_unclear==0 ~ "Low",
n_low>=0&n_high==0&n_unclear>0 ~ "Unclear",
n_low>=0&n_high>0&n_unclear>=0 ~ "High",
TRUE ~ NA_character_),
.groups = "drop") %>%
dplyr::mutate(subdomain = factor("Overall", levels = levels(data_rob$subdomain))) %>%
dplyr::select(-starts_with("n_"))
data_app <- long %>%
filter(domain == "Applicability") %>%
dplyr::mutate(value = tolower(value),
value = case_when(value %in% c("low", "l") ~ "Low",
value %in% c("high", "high") ~ "High",
value %in% c("unclear", "u") ~ "Unclear"),
assess = factor(value, levels = c("High", "Unclear", "Low")))
data_app_overall <- data_app %>%
dplyr::distinct(id,type, domain, subdomain, assess) %>%
dplyr::group_by(id,type, domain) %>%
dplyr::summarise(n_low = sum(assess=="Low", na.rm = T),
n_high = sum(assess=="High", na.rm = T),
n_unclear = sum(assess=="Unclear", na.rm = T),
assess = case_when(n_low>0&n_high==0&n_unclear==0 ~ "Low",
n_low>=0&n_high==0&n_unclear>0 ~ "Unclear",
n_low>=0&n_high>0&n_unclear>=0 ~ "High",
TRUE ~ NA_character_),
.groups = "drop") %>%
dplyr::mutate(subdomain = factor("Overall", levels = levels(data_rob$subdomain))) %>%
dplyr::select(-starts_with("n_"))
out <- dplyr::bind_rows(data_rob, data_rob_overall, data_app, data_app_overall) %>%
dplyr::select(id:subdomain, assess, item, value) %>%
dplyr::mutate(assess = factor(assess, levels = c("High", "Unclear", "Low")))
return(out)}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.