#' Retrieve data from central
#'
#' Retrieve data from Central server
#' @param fids Form IDs for which data should be retrieved. If NULL, all
#' @param except_fids Form IDs to exclude from retrieval
#' @param clean_column_names Whether to clean the column names
#' @param handle_sefull_split Whether to smartly handle the sefull split by combining all the sefull forms into one
#' @return A list
#' @export
#' @import ruODK
#' @import yaml
#' @import dplyr
#' @import data.table
retrieve_data_from_central <- function(fids = NULL,
except_fids = NULL,
clean_column_names = TRUE,
handle_sefull_split = FALSE){
# Make sure environment variables are sufficient
environment_variables <- Sys.getenv()
ok <- 'bohemia_credentials' %in% names(environment_variables)
if(!ok){
stop('You need to define a bohemia_credentials environment variable. Do this by runnning credentials_check("path/to/bohemia_credentials.yaml")')
}
bohemia_credentials <- Sys.getenv('bohemia_credentials')
# Actually read in the credentials
creds <- yaml::yaml.load_file(bohemia_credentials)
# Set up some parameters
ruODK::ru_setup(
fid = NULL,
# fid = 'ntd',
url = creds$url,
un = creds$un,
pw = creds$pw,
verbose = TRUE,
tz = 'UTC'
)
project_name <- creds$project_name
# Redefine project_list since ruODK function broke
isodt_to_local <- function(datetime_string,
orders = c("YmdHMS", "YmdHMSz"),
tz = get_default_tz()) {
datetime_string %>%
lubridate::parse_date_time(orders = orders) %>%
lubridate::with_tz(., tzone = tz)
}
get_project_list <- function (url = get_default_url(), un = get_default_un(), pw = get_default_pw(),
retries = get_retries(), orders = c("YmdHMS", "YmdHMSz",
"Ymd HMS", "Ymd HMSz", "Ymd", "ymd"), tz = get_default_tz()) {
require(ruODK)
# yell_if_missing(url, un, pw)
httr::RETRY("GET", httr::modify_url(url, path = glue::glue("v1/projects")),
httr::add_headers(Accept = "application/xml", `X-Extended-Metadata` = "true"),
httr::authenticate(un, pw), times = retries) %>%
# yell_if_error(., url, un, pw) %>%
httr::content(.) %>% tibble::tibble(.) %>%
tidyr::unnest_wider(".", names_repair = "universal") %>%
janitor::clean_names(.) %>% dplyr::mutate_at(dplyr::vars("last_submission",
"created_at",
"updated_at"#,
# "deleted_at" # this is the only change
), ~isodt_to_local(.,
orders = orders, tz = tz)) %>% {
if ("archived" %in% names(.)) {
dplyr::mutate(., archived = tidyr::replace_na(archived,
FALSE))
}
else {
.
}
}
}
# List projects on the server
# projects <- ruODK::project_list()
projects <- get_project_list()
# Define which project to use
pid <- projects$id[projects$name == project_name]
ruODK::ru_setup(pid = pid)
# # Get a list of forms in the project
# fl <- get_form_list()
# fl <- ruODK::form_list() # this function stopped working in newer versions,
# replacing here
gfl <- function(pid = get_default_pid(),
url = get_default_url(),
un = get_default_un(),
pw = get_default_pw(),
retries = get_retries(),
orders = c(
"YmdHMS",
"YmdHMSz",
"Ymd HMS",
"Ymd HMSz",
"Ymd",
"ymd"
),
tz = get_default_tz()) {
httr::RETRY(
"GET",
httr::modify_url(url, path = glue::glue("v1/projects/{pid}/forms")),
httr::add_headers(
"Accept" = "application/xml",
"X-Extended-Metadata" = "true"
),
httr::authenticate(un, pw),
times = retries
) %>%
httr::content(.) %>%
tibble::tibble(.) %>%
tidyr::unnest_wider(".", names_repair = "universal") %>%
# tidyr::unnest_wider(
# "reviewStates",
# names_repair = "universal", names_sep = "_"
# ) %>%
# tidyr::unnest_wider(
# "createdBy",
# names_repair = "universal", names_sep = "_"
# ) %>%
janitor::clean_names() %>%
dplyr::mutate_at(
dplyr::vars(dplyr::contains("_at")), # assume datetimes are named "_at"
~ isodt_to_local(., orders = orders, tz = tz)
) %>%
dplyr::mutate(fid = xml_form_id)
}
fl <- gfl()
# Cut down to only the form IDs which are relevant
if(!is.null(fids)){
fl <- fl %>% filter(fid %in% fids)
}
if(!handle_sefull_split){
fl <- fl %>% filter(!fid %in% c('sefullv1', 'sefullv2', 'sefullv3', 'sefullv4', 'sefullv5'))
}
# Remove the except ones
if(!is.null(except_fids)){
message('Not retrieving any data for: ', except_fids)
fl <- fl %>% filter(!fid %in% except_fids)
}
if(nrow(fl) > 0){
# Loop through each form ID and get the submission
out_list <- list()
for(i in 1:nrow(fl)){
this_fid <- fl$fid[i]
message('Form ', i, ' of ', nrow(fl), ': ', this_fid)
# # Get the schema for the form
# schema <- form_schema_ext(fid = this_fid)
# schema_df <- schema %>% dplyr::select(ruodk_name, name, type)
# New zip method
td <- paste0('/tmp/odk/')
if(dir.exists(td)){
unlink(td, recursive = TRUE)
}
dir.create(td)
ruODK::submission_export(
local_dir = td,
overwrite = TRUE,
media = FALSE,
repeats = TRUE,
fid = this_fid,
verbose = TRUE
)
# unzip the downloaded files
ed <- paste0(td, 'unzipped/')
zip_path <- paste0(td, this_fid, '.zip')
unzip(zipfile = zip_path, exdir = ed)
# Read in the downloaded files
file_names <- dir(ed)
data_list <- list()
fid_list <- c()
for(f in 1:length(file_names)){
this_file_name <- file_names[f]
this_sub_form <- gsub('.csv', '', this_file_name)
# See if this is the main submission form or not
is_main <- !grepl('-', this_sub_form)
this_sub_form <- unlist(lapply(strsplit(this_sub_form, split = '-'), function(x){x[length(x)]}))
if(is_main){
this_sub_form <- 'Submissions'
}
fid_list <- c(fid_list, this_sub_form)
file_path <- paste0(ed, this_file_name)
this_data <- readr::read_csv(file_path, guess_max = Inf)
# Clean the column names
if(clean_column_names){
names(this_data) <- unlist(lapply(strsplit(names(this_data), '-'), function(a){a[length(a)]}))
}
this_data$id <- this_data$KEY
this_data <- janitor::clean_names(this_data)
this_data <- this_data[,!duplicated(names(this_data))]
data_list[[f]] <- this_data
}
names(data_list) <- fid_list
out_list[[i]] <- data_list
}
names(out_list) <- fl$fid
# Handle the sefull split
if(handle_sefull_split){
message('Handling the sefull split')
if('sefullv1' %in% names(out_list) | 'sefull' %in% names(out_list)){
for(j in 1:length(names(out_list$sefull))){
message(j)
this_name <- names(out_list$sefull)[j]
df0 <- out_list$sefull[[this_name]]
df1 <- out_list$sefullv1[[this_name]]
df2 <- out_list$sefullv2[[this_name]]
df3 <- out_list$sefullv3[[this_name]]
df4 <- out_list$sefullv4[[this_name]]
df5 <- out_list$sefullv5[[this_name]]
# Handle the possibly empty sefull form
if(nrow(df0) < 1){
df0 <- df1 %>% head(0)
}
if('anc_when' %in% names(df3)){
fix_anc_when <- function(d){
d %>% mutate(anc_when = as.Date(anc_when))
}
df0 <- df0 %>% fix_anc_when()
df1 <- df1 %>% fix_anc_when()
df2 <- df2 %>% fix_anc_when()
df3 <- df3 %>% fix_anc_when()
df4 <- df4 %>% fix_anc_when()
df5 <- df5 %>% fix_anc_when()
}
if('drug_swallow_date' %in% names(df3)){
fix_drug_swallow_date <- function(d){
d %>% mutate(drug_swallow_date = as.Date(drug_swallow_date))
}
df0 <- df0 %>% fix_drug_swallow_date()
df1 <- df1 %>% fix_drug_swallow_date()
df2 <- df2 %>% fix_drug_swallow_date()
df3 <- df3 %>% fix_drug_swallow_date()
df4 <- df4 %>% fix_drug_swallow_date()
df5 <- df5 %>% fix_drug_swallow_date()
}
if('drug_swallow_date_alt' %in% names(df3)){
fix_drug_swallow_date_alt <- function(d){
d %>% mutate(drug_swallow_date_alt = as.POSIXct(drug_swallow_date_alt, tz = 'Europe/Madrid'))
}
df0 <- df0 %>% fix_drug_swallow_date_alt()
df1 <- df1 %>% fix_drug_swallow_date_alt()
df2 <- df2 %>% fix_drug_swallow_date_alt()
df3 <- df3 %>% fix_drug_swallow_date_alt()
df4 <- df4 %>% fix_drug_swallow_date_alt()
df5 <- df5 %>% fix_drug_swallow_date_alt()
}
if('irs_past12_check' %in% names(df5)){
fix_irs_past12_check <- function(d){
d %>% mutate(irs_past12_check = as.Date(irs_past12_check))
}
df0 <- df0 %>% fix_irs_past12_check()
df1 <- df1 %>% fix_irs_past12_check()
df2 <- df2 %>% fix_irs_past12_check()
df3 <- df3 %>% fix_irs_past12_check()
df4 <- df4 %>% fix_irs_past12_check()
df5 <- df5 %>% fix_irs_past12_check()
}
if('sec8_q1_dob_1' %in% names(df3)){
fix_sec8_q1_dob_1 <- function(d){
d %>% mutate(sec8_q1_dob_1 = as.POSIXct(sec8_q1_dob_1, tz = 'Europe/Madrid'))
}
df0 <- df0 %>% fix_sec8_q1_dob_1()
df1 <- df1 %>% fix_sec8_q1_dob_1()
df2 <- df2 %>% fix_sec8_q1_dob_1()
df3 <- df3 %>% fix_sec8_q1_dob_1()
df4 <- df4 %>% fix_sec8_q1_dob_1()
df5 <- df5 %>% fix_sec8_q1_dob_1()
}
if('sec8_q1_dob_2' %in% names(df3)){
fix_sec8_q1_dob_2 <- function(d){
d %>% mutate(sec8_q1_dob_2 = as.POSIXct(sec8_q1_dob_2, tz = 'Europe/Madrid'))
}
df0 <- df0 %>% fix_sec8_q1_dob_2()
df1 <- df1 %>% fix_sec8_q1_dob_2()
df2 <- df2 %>% fix_sec8_q1_dob_2()
df3 <- df3 %>% fix_sec8_q1_dob_2()
df4 <- df4 %>% fix_sec8_q1_dob_2()
df5 <- df5 %>% fix_sec8_q1_dob_2()
}
if('sec8_q2_dob_1' %in% names(df3)){
fix_sec8_q2_dob_1 <- function(d){
d %>% mutate(sec8_q2_dob_1 = as.POSIXct(sec8_q2_dob_1, tz = 'Europe/Madrid'))
}
df0 <- df0 %>% fix_sec8_q2_dob_1()
df1 <- df1 %>% fix_sec8_q2_dob_1()
df2 <- df2 %>% fix_sec8_q2_dob_1()
df3 <- df3 %>% fix_sec8_q2_dob_1()
df4 <- df4 %>% fix_sec8_q2_dob_1()
df5 <- df5 %>% fix_sec8_q2_dob_1()
}
if('sec8_q2_dob_2' %in% names(df3)){
fix_sec8_q2_dob_2 <- function(d){
d %>% mutate(sec8_q2_dob_2 = as.POSIXct(sec8_q2_dob_2, tz = 'Europe/Madrid'))
}
df0 <- df0 %>% fix_sec8_q2_dob_2()
df1 <- df1 %>% fix_sec8_q2_dob_2()
df2 <- df2 %>% fix_sec8_q2_dob_2()
df3 <- df3 %>% fix_sec8_q2_dob_2()
df4 <- df4 %>% fix_sec8_q2_dob_2()
df5 <- df5 %>% fix_sec8_q2_dob_2()
}
# if('ind_dob' %in% names(df0)){
# fix_ind_dob <- function(d){
# d %>% mutate(ind_dob = as.Date(ind_dob))
# }
# df0 <- df0 %>% fix_ind_dob()
# df1 <- df1 %>% fix_ind_dob()
# df2 <- df2 %>% fix_ind_dob()
# df3 <- df3 %>% fix_ind_dob()
# df4 <- df4 %>% fix_ind_dob()
# df5 <- df5 %>% fix_ind_dob()
# }
# if('ind_dob2' %in% names(df0)){
# fix_ind_dob2 <- function(d){
# d %>% mutate(ind_dob2 = as.Date(ind_dob2))
# }
# df0 <- df0 %>% fix_ind_dob2()
# df1 <- df1 %>% fix_ind_dob2()
# df2 <- df2 %>% fix_ind_dob2()
# df3 <- df3 %>% fix_ind_dob2()
# df4 <- df4 %>% fix_ind_dob2()
# df5 <- df5 %>% fix_ind_dob2()
# }
# if('dobirth' %in% names(df0)){
# fix_dobirth<- function(d){
# d %>% mutate(dobirth = as.Date(dobirth))
# }
# df0 <- df0 %>% fix_dobirth()
# df1 <- df1 %>% fix_dobirth()
# df2 <- df2 %>% fix_dobirth()
# df3 <- df3 %>% fix_dobirth()
# df4 <- df4 %>% fix_dobirth()
# df5 <- df5 %>% fix_dobirth()
# }
# if('dob_metadata' %in% names(df0)){
# fix_dob_metadata<- function(d){
# d %>% mutate(dob_metadata = as.Date(dob_metadata))
# }
# df0 <- df0 %>% fix_dob_metadata()
# df1 <- df1 %>% fix_dob_metadata()
# df2 <- df2 %>% fix_dob_metadata()
# df3 <- df3 %>% fix_dob_metadata()
# df4 <- df4 %>% fix_dob_metadata()
# df5 <- df5 %>% fix_dob_metadata()
# }
x <-
tibble(data.table::rbindlist(
list(df0, df1, df2, df3, df4, df5)
))
out_list$sefull[[this_name]] <- x
}
}
out_list$sefullv1 <- NULL
out_list$sefullv2 <- NULL
out_list$sefullv3 <- NULL
out_list$sefullv4 <- NULL
out_list$sefullv5 <- NULL
}
data_list <- out_list
message('Returning a list of length ', length(data_list))
return(data_list)
} else {
message('There are no forms with the IDs supplied. Returning an empty list')
return(list())
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.