#' Interactively create a UK Biobank dataset
#'
#' Launches a shiny app to interactively create a UK Biobank dataset
#'
#' @inheritParams ukbwranglr::read_ukb
#' @inheritParams shiny::shinyApp
#' @param ... Additional parameters passed on to \code{\link[shiny]{shinyApp}}
#' @import shiny
#' @import shinyFiles
#'
#' @return NULL
#'
#' @export
#' @examples
#' \dontrun{
#' runMakeDataset()
#' }
runMakeDataset <- function(ukb_data_dict = ukbwranglr::get_ukb_data_dict(),
ukb_codings = ukbwranglr::get_ukb_codings(),
options = list(launch.browser = TRUE),
...) {
# SETUP -------------------------------------------------------------------
selected_data_dict_cols <- c(
"Field",
"FieldID",
"instance",
"array",
"Path",
"Category",
"Participants",
"Items",
"Stability",
"ValueType",
"Units",
"ItemType",
"Strata",
"Sexed",
"Instances",
"Array",
"Coding",
"Notes",
"Link",
"descriptive_colnames",
"colheaders_raw",
"colheaders_processed"
)
derived_variables_list <- list(
"Date of birth" = list(
derive_function = ukbwranglr::derive_dob,
required_fids = ukbwranglr::derive_dob(.details_only = TRUE)$required_field_ids
),
"Ethnicity, simplified" = list(
derive_function = ukbwranglr::derive_ethnic_background_simplified,
required_fids = ukbwranglr::derive_ethnic_background_simplified(.details_only = TRUE)$required_field_ids
)
)
derived_variables_required_fids_df <- derived_variables_list %>%
purrr::map("required_fids") %>%
purrr::map(toString) %>%
tibble::as_tibble() %>%
t() %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "Derived variable") %>%
dplyr::rename("Required FieldIDs" = .data[["V1"]])
# UI ----------------------------------------------------------------------
# Conditional UI elements -------------------------------------------------
download_data_dict_tabs <- tabsetPanel(
id = "download_data_dict_inputs",
type = "hidden",
tabPanel("empty"),
tabPanel(
"download_data_dict_inputs",
tags$hr(),
h4("Download data dictionary"),
downloadButton("download_data_dict_full",
label = "All variables"),
downloadButton("download_data_dict_selected",
label = "Selected variables")
)
)
make_dataset_tabs <- tabsetPanel(
id = "make_dataset_inputs",
type = "hidden",
tabPanel("empty"),
tabPanel(
"make_dataset_inputs",
tags$hr(),
h4("Create dataset"),
numericInput(
"nrows",
"N rows to include",
value = 510000,
min = 0,
max = 510000
),
checkboxInput("descriptive_colnames", "Descriptive column names", value = TRUE),
checkboxInput("labelled", "Labelled", value = TRUE),
uiOutput("derived_variables_input"),
selectInput(
inputId = "summarise_numerical_variables",
"Summarise numerical variables",
choices = c("mean", "min", "max", "sum", "n_values"),
multiple = TRUE
),
textInput(
"ukb_dataset_outname",
"Dataset file name",
value = paste0("my_ukb_dataset_", Sys.Date(), ".rds")
),
downloadButton("ukb_dataset",
label = "Make dataset"),
)
)
clinical_events_tabs <- tabsetPanel(
id = "clinical_events_inputs",
type = "hidden",
tabPanel("empty"),
tabPanel(
"clinical_events_inputs",
tags$hr(),
h4("Clinical events"),
numericInput(
"nrows_clinical_events",
"N rows to include",
value = 510000,
min = 0,
max = 510000
),
textInput(
"clinical_events_outname",
"Clinical events file name",
value = paste0("clinical_events_", Sys.Date(), ".csv")
),
downloadButton("download_clinical_events",
label = "Extract clinical events"),
)
)
# Main UI -----------------------------------------------------------------
ui <- fluidPage(# Application title
titlePanel("Make a UK Biobank dataset"),
# Sidebar
sidebarLayout(
sidebarPanel(
width = 2,
h4("Input file paths"),
shinyFilesButton(
"ukb_main_dataset_filepath",
label = "UKB main dataset",
title = "Please select a UK Biobank main dataset file",
multiple = FALSE,
viewtype = "detail"
),
shinyFilesButton(
"data_dict_preselected",
label = "Data dictionary (pre-selected)",
title = "Please select a UK Biobank main dataset data dictionary file",
multiple = FALSE,
viewtype = "detail"
),
download_data_dict_tabs,
make_dataset_tabs,
clinical_events_tabs
),
# Show the data dictionary
mainPanel(
tabsetPanel(
type = "pills",
tabPanel(
"Interactive data dictionary",
reactable::reactableOutput("data_dict")
),
tabPanel(
"Selected variables",
reactable::reactableOutput("data_dict_selected")
),
tabPanel(
"Selected input filepaths",
h4("UKB main dataset"),
verbatimTextOutput("ukb_main_dataset_filepath"),
h4("Data dictionary (pre-selected)"),
verbatimTextOutput("data_dict_preselected_filepath"),
),
tabPanel(
"Required FieldIDs for derived variables",
reactable::reactableOutput("derived_variables_required_fids")
)
),
width = 9
)
))
# SERVER ------------------------------------------------------------------
server <- function(input, output, session) {
# DERIVED VARIABLE REQUIRED FIDS REACTABLE --------------------------------
output$derived_variables_required_fids <-
reactable::renderReactable(
reactable::reactable(
derived_variables_required_fids_df,
filterable = TRUE,
showPageSizeOptions = TRUE,
pageSizeOptions = c(10, 25, 50, 100, 200),
paginationType = "jump"
)
)
# SHINYFILES SETUP --------------------------------------------------------
volumes <-
c(Home = fs::path_home(),
"R Installation" = R.home(),
getVolumes()())
# Main dataset ------------------------------------------------------------
# main dataset file
shinyFileChoose(input,
"ukb_main_dataset_filepath",
roots = volumes,
session = session)
# print UKB main dataset file selection to console
observe({
cat("\ninput$ukb_main_dataset_filepath value:\n\n")
print(input$ukb_main_dataset_filepath)
})
# print file selection to browser
output$ukb_main_dataset_filepath <- renderPrint({
if (is.integer(input$ukb_main_dataset_filepath)) {
cat("Please select a UKB main dataset")
} else {
parseFilePaths(volumes, input$ukb_main_dataset_filepath)$datapath %>%
stringr::str_split(" \\u2022 ") %>%
.[[1]] %>%
as.list() %>%
do.call(file.path, .)
}
})
# get filepath to main dataset
ukb_main_dataset_filepath <- reactive({
if (is.integer(input$ukb_main_dataset_filepath)) {
cat("No files have been selected (shinyFileChoose)")
} else {
parseFilePaths(volumes, input$ukb_main_dataset_filepath)$datapath %>%
stringr::str_split(" \\u2022 ") %>%
.[[1]] %>%
as.list() %>%
do.call(file.path, .)
}
})
# Data dictionary (pre-selected) ------------------------------------------
# pre-selected data dictionary
shinyFileChoose(input,
"data_dict_preselected",
roots = volumes,
session = session)
# print selection to console
observe({
cat("\ninput$data_dict_preselected value:\n\n")
print(input$data_dict_preselected)
})
# print file selection to browser
output$data_dict_preselected_filepath <- renderPrint({
if (is.integer(input$data_dict_preselected)) {
cat("Please select a data dictionary (optional)")
} else {
parseFilePaths(volumes, input$data_dict_preselected)$datapath %>%
stringr::str_split(" \\u2022 ") %>%
.[[1]] %>%
as.list() %>%
do.call(file.path, .)
}
})
# get filepath to selected UKB data dictionary file (of pre-selected variables)
data_dict_preselected_filepath <- reactive({
if (is.integer(input$data_dict_preselected)) {
cat("No files have been selected (shinyFileChoose)")
} else {
parseFilePaths(volumes, input$data_dict_preselected)$datapath %>%
stringr::str_split(" \\u2022 ") %>%
.[[1]] %>%
as.list() %>%
do.call(file.path, .)
}
})
# DATA DICTIONARY REACTABLE TABLE -----------------------------------------
# Full data dictionary reactable ------------------------------------------
# create data dict
ukb_main_dataset_filepath_ext <- reactive({
req(ukb_main_dataset_filepath())
ext <- ukbwranglr:::extract_file_ext(ukb_main_dataset_filepath())
if (!ext %in% c("csv", "tsv", "txt", "tab", "dta")) {
validate(
"Invalid file extension for main dataset; Please upload a comma (.csv), tab delimited (.tsv/.txt/.tab) or STATA (.dta) file"
)
}
ext
})
data_dict <- reactive({
req(ukb_main_dataset_filepath(),
ukb_main_dataset_filepath_ext())
switch(
ukb_main_dataset_filepath_ext(),
csv = ukbwranglr::make_data_dict(
ukb_main_dataset_filepath(),
delim = ",",
ukb_data_dict = ukb_data_dict
) %>%
dplyr::select(tidyselect::all_of(selected_data_dict_cols)),
tsv = ukbwranglr::make_data_dict(
ukb_main_dataset_filepath(),
delim = "\t",
ukb_data_dict = ukb_data_dict
) %>%
dplyr::select(tidyselect::all_of(selected_data_dict_cols)),
txt = ukbwranglr::make_data_dict(
ukb_main_dataset_filepath(),
delim = "\t",
ukb_data_dict = ukb_data_dict
) %>%
dplyr::select(tidyselect::all_of(selected_data_dict_cols)),
tab = ukbwranglr::make_data_dict(
ukb_main_dataset_filepath(),
delim = "\t",
ukb_data_dict = ukb_data_dict
) %>%
dplyr::select(tidyselect::all_of(selected_data_dict_cols)),
dta = ukbwranglr::make_data_dict(
ukb_main_dataset_filepath(),
delim = "\t",
ukb_data_dict = ukb_data_dict
) %>%
dplyr::select(tidyselect::all_of(selected_data_dict_cols))
)
})
# read data dictionary of pre-selected variables (optional)
data_dict_preselected <- reactive({
req(data_dict_preselected_filepath())
ext <- ukbwranglr:::extract_file_ext(data_dict_preselected_filepath())
if (ext != "csv") {
validate("Invalid file extension for data dicionary, must be a csv file")
}
readr::read_csv(data_dict_preselected_filepath())
})
# display data dictionary for selected UKB file
output$data_dict <- reactable::renderReactable({
req(data_dict())
reactable::reactable(
# data,
data_dict(),
filterable = TRUE,
searchable = TRUE,
showPageSizeOptions = TRUE,
pageSizeOptions = c(10, 25, 50, 100, 200),
groupBy = "Field",
selection = "multiple",
onClick = "select",
defaultSelected = 1,
paginationType = "jump",
theme = reactable::reactableTheme(
rowSelectedStyle = list(backgroundColor = "#eee", boxShadow = "inset 2px 0 0 0 #ffa62d")
)
)
})
# selected rows
selected <-
reactive(reactable::getReactableState("data_dict", "selected"))
# Update selected variables from pre-selected data dict upload ------------
# update selected rows to include those in data_dict_preselected
observeEvent(data_dict_preselected(), {
# currently selected
currently_selected_rowids <-
reactable::getReactableState("data_dict", "selected")
# get row indices to update
preselected_rowids <- data_dict() %>%
tibble::rowid_to_column() %>%
dplyr::filter(.data[["colheaders_raw"]] %in% data_dict_preselected()$colheaders_raw) %>%
dplyr::pull(.data[["rowid"]])
# union
updated_selection_rowids <-
unique(c(currently_selected_rowids, preselected_rowids))
# update
reactable::updateReactable("data_dict", selected = updated_selection_rowids)
})
# Selected variables reactable --------------------------------------------
# print selected variables as reactable
output$data_dict_selected <- reactable::renderReactable({
req(data_dict())
req(selected())
reactable::reactable(
# data,
data_dict()[selected(),],
filterable = TRUE,
searchable = TRUE,
showPageSizeOptions = TRUE,
pageSizeOptions = c(10, 25, 50, 100, 200),
groupBy = "Field",
paginationType = "jump"
)
})
# print selected rows in console
observe({
print(data_dict()[selected(),])
})
# DYNAMIC UI ELEMENTS -----------------------------------------------------
# Download and make dataset tabs ------------------------------------------
observeEvent(data_dict(), {
if (!is.null(data_dict())) {
updateTabsetPanel(inputId = "download_data_dict_inputs", selected = "download_data_dict_inputs")
updateTabsetPanel(inputId = "make_dataset_inputs", selected = "make_dataset_inputs")
updateTabsetPanel(inputId = "clinical_events_inputs", selected = "clinical_events_inputs")
} else {
updateTabsetPanel(inputId = "download_data_dict_inputs", selected = "empty")
updateTabsetPanel(inputId = "make_dataset_inputs", selected = "empty")
updateTabsetPanel(inputId = "clinical_events_inputs", selected = "empty")
}
})
# Derived variables -------------------------------------------------------
derived_variables_list_available <- reactive({
req(selected())
selected_fieldids <- data_dict()[selected(),]$FieldID
derived_variables_list %>%
purrr::keep( ~ all(.x$required_fids %in% selected_fieldids))
})
output$derived_variables_input <- renderUI({
selectInput(
inputId = "derived_variables_input",
label = "Select variables to derive",
multiple = TRUE,
choices = names(derived_variables_list_available())
)
})
# DOWNLOADS ---------------------------------------------------------------
# Data dictionary - full --------------------------------------------------
output$download_data_dict_full <- downloadHandler(
filename = function() {
("data_dict_full.csv")
},
content = function(file) {
readr::write_csv(data_dict(), file, na = "")
}
)
# Data dictionary - selected rows only ------------------------------------
output$download_data_dict_selected <- downloadHandler(
filename = function() {
("data_dict_selected.csv")
},
content = function(file) {
readr::write_csv(data_dict()[selected(),], file, na = "")
}
)
# UKB dataset -------------------------------------------------------------
output$ukb_dataset <- downloadHandler(
filename = function() {
input$ukb_dataset_outname
},
content = function(file) {
# setup
ext <- ukbwranglr:::extract_file_ext(file)
if (!ext %in% c("rds", "dta", "csv", "tsv")) {
showNotification(
"Invalid file type. File extensionn must be one of '.rds', '.dta', '.csv' or '.tsv'. Please click 'cancel' in dialog box",
type = "error"
)
stop(
"Invalid file type. File extensionn must be one of '.rds', '.dta', '.csv' or '.tsv'"
)
}
req(data_dict())
delim <- switch(
ukb_main_dataset_filepath_ext(),
csv = ",",
tsv = "\t",
txt = "\t",
tab = "\t",
dta = ""
)
# read selected variables
notify <- function(msg, id = NULL) {
showNotification(msg,
id = id,
duration = NULL,
closeButton = FALSE)
}
message("Reading UKB data")
id <- notify("Reading data...")
on.exit(removeNotification(id), add = TRUE)
ukb_main <-
ukbwranglr::read_ukb(
path = ukb_main_dataset_filepath(),
delim = delim,
ukb_data_dict = ukb_data_dict,
ukb_codings = ukb_codings,
data_dict = data_dict()[selected(),],
descriptive_colnames = input$descriptive_colnames,
label = input$labelled,
nrows = input$nrows
)
# TODO - get ethnic background working
# derive variables
if (length(input$derived_variables_input > 0)) {
notify("Deriving variables...", id = id)
for (variable in input$derived_variables_input) {
ukb_main <-
derived_variables_list_available()[[variable]]$derive_function(ukb_main,
ukb_data_dict = ukb_data_dict)
}
}
# summarise numerical variables
if (length(input$summarise_numerical_variables > 0)) {
notify("Summarising numerical variables...", id = id)
for (summary_function in input$summarise_numerical_variables) {
try(ukb_main <- ukbwranglr::summarise_numerical_variables(ukb_main = ukb_main,
summary_function = summary_function))
}
}
if (ext == "dta") {
# STATA colheaders cannot contain '.' and must not be too long
message("STATA output: renaming columns...\n")
notify("STATA output: renaming columns...", id = id)
names(ukb_main) <- ukbwranglr::make_data_dict(ukb_main,
ukb_data_dict = ukb_data_dict)$colheaders_processed
# names(ukb_main) <- data_dict()[selected(), ]$colheaders_processed
}
# write dataset
message("Writing data\n")
notify("Writing data...", id = id)
switch(
ext,
rds = saveRDS(ukb_main, file = file),
dta = haven::write_dta(ukb_main, path = file),
csv = readr::write_csv(ukb_main, file = file, na = ""),
tsv = readr::write_tsv(ukb_main, file = file, na = "")
)
message("Finished writing data")
showNotification("Dataset succesfully created!", duration = 10)
}
)
# Clinical events ---------------------------------------------------------
output$download_clinical_events <- downloadHandler(
filename = function() {
input$clinical_events_outname
},
content = function(file) {
# TODO - extract this to function (currently c+p from ukb dataset section)
# setup
ext <- ukbwranglr:::extract_file_ext(file)
if (!ext %in% c("rds", "dta", "csv", "tsv")) {
showNotification(
"Invalid file type. File extensionn must be one of '.rds', '.dta', '.csv' or '.tsv'. Please click 'cancel' in dialog box",
type = "error"
)
stop(
"Invalid file type. File extensionn must be one of '.rds', '.dta', '.csv' or '.tsv'"
)
}
req(data_dict())
delim <- switch(
ukb_main_dataset_filepath_ext(),
csv = ",",
tsv = "\t",
txt = "\t",
tab = "\t",
dta = ""
)
# get available clinical events FIDs
available_clinical_events <-
ukbwranglr:::CLINICAL_EVENTS_FIELD_IDS %>%
purrr::keep(~ all(.x %in% data_dict()$FieldID))
available_clinical_events_fids <- available_clinical_events %>%
purrr::reduce(c)
message(paste0(
"Available clinical events: ",
stringr::str_c(
names(available_clinical_events),
sep = "",
collapse = ", "
)
))
clinical_events_data_dict <- data_dict() %>%
dplyr::filter(.data[["FieldID"]] %in% c("eid",
available_clinical_events_fids))
# read clinical events variables
notify <- function(msg, id = NULL) {
showNotification(msg,
id = id,
duration = NULL,
closeButton = FALSE)
}
message("Reading UKB data")
id <- notify("Reading clinical events data...")
on.exit(removeNotification(id), add = TRUE)
result <-
ukbwranglr::read_ukb(
path = ukb_main_dataset_filepath(),
delim = delim,
ukb_data_dict = ukb_data_dict,
ukb_codings = ukb_codings,
data_dict = clinical_events_data_dict,
descriptive_colnames = TRUE,
label = FALSE,
nrows = input$nrows_clinical_events
)
# tidy
# write dataset
message("Tidying clinical events data\n")
notify("Tidying clinical events...", id = id)
result <- ukbwranglr::tidy_clinical_events(
ukb_main = result,
ukb_data_dict = ukb_data_dict,
ukb_codings = ukb_codings,
strict = FALSE
) %>%
dplyr::bind_rows()
# write dataset
message("Writing data\n")
notify("Writing data...", id = id)
readr::write_csv(result, file = file, na = "")
message("Finished writing data")
showNotification("Dataset succesfully created!", duration = 10)
}
)
# RUN APPLICATION ---------------------------------------------------------
}
shinyApp(ui = ui,
server = server,
options = options,
...)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.