## First this is to loop through folder and bringing in file one at a time and
## creating the data dictionary. Then append the file with the next file in the
## folder. - Jeff F
library (tidyverse)
library (lubridate)
library (stringr)
library (readxl)
library (xlsx)
library (hablar)
rm(list = ls())
# First lets create the overview page.
setwd("Z:/Data Files/Practice Data")
tab1 <- read_xlsx (path="DataDictionarySchema.xlsx", col_names = FALSE)
setwd("Z:/Data Files/Practice Data/Data Dictionary")
## Now lets load all the files in the folder and the proper sheets - Jeff F
temp = list.files(pattern="*.xlsx")
temp5 = list.files(pattern="*.csv")
for (i in 1:length(temp)) {
assign(paste0("survey_", temp[i]), readxl::read_excel(temp[i], sheet = "survey"))
}
for (i in 1:length(temp)) {
assign(paste0("choices_", temp[i]), readxl::read_excel(temp[i], sheet = "choices"))
}
for (i in 1:length(temp)) {
assign(paste0("settings_", temp[i]), readxl::read_excel(temp[i], sheet = "settings"))
}
for (i in 1:length(temp5)) {
assign(temp5[i], read_csv(file = temp5[i]))
}
## Next lets create the data dictionary... gonna have to loop through all the
## different survey forms and input into just 1 file - Jeff
temp2 <- mget(ls(pattern="settings_*"))
temp3 <- mget(ls(pattern="survey_*"))
temp4 <- mget(ls(pattern ="choices_*"))
temp6 <- mget(ls(pattern ="*.csv"))
test <- NA
tab2_temp <- temp3
tab2_temp2 <- temp3
tab2 <- temp3
tab3 <- temp4
for (i in 1:length(temp)) {
#fix column 3 name (it sometimes comes in differently):
colnames(temp3[[i]])[3] <- 'label:English'
#grab form title from temp2
test <- temp2[[i]]$form_title
#make start_date and end_date dates if the file exists:
j <- match(paste0(temp2[[i]]$form_title, ".csv"), temp5)
mintoday <- ifelse(is.na(j), NA, min(temp6[[j]]$today))
maxtoday <- ifelse(is.na(j), NA, max(temp6[[j]]$today))
#Lets create each table at a time
tab2_temp[[i]] <- temp3[[i]] %>%
mutate (survey_name = test, var_type = type, start_date = mintoday,
end_date = maxtoday, comment = NA) %>%
rename(var_label = type, variable_name = name, description = 'label:English') %>%
select (survey_name,
variable_name, var_label, var_type, description,
start_date, end_date, comment) %>%
add_row(survey_name = 'SurveyCTO name',
variable_name = 'Name column in "survey" tab', var_label = 'in "type" column', var_type = 'factor, date, numeric, text …', description = 'label:English column - just paste in the question', start_date = 'Date column was initiated', end_date = 'Date column was discontinued', comment = 'Additional information for user', .before = 1)
#Then move that table into a group of tables called tab2
tab2[[i]] <- tab2_temp[[i]] %>% filter (var_label != 'note' & var_label != 'end group' &
var_label != 'begin group' & var_label != 'end repeat' &
var_label != 'begin repeat') %>%
mutate (var_type = ifelse(var_type == 'start', 'date', var_type),
var_type = ifelse(var_type == 'end', 'date', var_type),
var_type = ifelse(var_type == 'calculate', 'numeric', var_type),
var_type = ifelse(var_type == 'integer', 'numeric', var_type),
var_type = ifelse(var_type == 'calculate', 'numeric', var_type),
var_type = ifelse(var_type == 'text', 'string', var_type),
var_type = ifelse(grepl('select_one', var_type), 'categorical (select one)', var_type),
var_type = ifelse(grepl('select_multiple', var_type), 'categorical (select multiple)', var_type),
var_type = ifelse(var_type == 'barcode', 'string', var_type)) %>%
mutate (var_type = ifelse(variable_name == 'today', 'date', var_type))
}
## Need to do tab 3: the factor values next:
for (i in length(temp)) {
colnames(temp4[[i]])[3] <- 'label:English'
tab3[[i]] <- temp4[[i]] %>%
mutate (start_date = NA, end_date = NA) %>%
rename (dictionary_id = list_name, factor_value = value, factor_label = 'label:English') %>%
select (dictionary_id, factor_value, factor_label, start_date, end_date)
}
## Finally all I need to do is output the data dictionary! Almost done!
## wow this runs super slow... took 45 minutes to complete 6 files (network is
## running slow today though)
for (i in seq_along(temp) ) {
write.xlsx(as.data.frame(tab1), file = paste0("Z:/Data Files/Practice Data/Data Dictionary/Output/Data_dictionary_", tab2[[i]]$survey_name[2], ".xlsx"), sheetName = "Data Dictionary-overview", row.names = FALSE, col.names = FALSE, showNA=FALSE)
write.xlsx(as.data.frame(tab2[[i]]), file = paste0("Z:/Data Files/Practice Data/Data Dictionary/Output/Data_dictionary_", tab2[[i]]$survey_name[2], ".xlsx"), sheetName = "Data Dictionary-1", append=TRUE, row.names = FALSE, showNA=FALSE)
write.xlsx(as.data.frame(tab3[[i]]), file = paste0("Z:/Data Files/Practice Data/Data Dictionary/Output/Data_dictionary_", tab2[[i]]$survey_name[2], ".xlsx"), sheetName = "Data Dictionary-2", append=TRUE, row.names = FALSE, showNA=FALSE)
}
# Extra: need to do formatting but will probably not have enough time for that
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.