#' fbs_databases: Generate all desired databases for the Food and Brain Study
#'
#' This function will generate up-to-date databases for the Food and Brain Study. The following databases are will be created unless only specific databases are specified:
#' 1) Demographics database: includes child (e.g., age, sex, pubertal status), parent characteristics (e.g., age, sex, education, alcohol use/abuse), and household characteristics (e.g., food insecurity, income)
#' 2) Anthropometrics: includes child and parent height/weight and child adiposity (DXA), actigraphy, physical activity, and sleep
#' 3) Intake: includes child liking ratings, Freddy Fullness, and intake for all eating paradigms
#' 4) Food Related Behaviors and Traits: includes all questionnaires assessing child or parent food related behaviors and/or traits (e.g., CEBQ, CFQ, TFEQ, etc.) and body image
#' 5) (Neuro)Psychological Assessments: includes the WASI and all questionnaires assessing the child's cognitive and/or psychosocial functioning (e.g., BRIEF, BIS/BAS, anxiety, etc.)
#' 6) Delay Discounting: includes the modeling results for delay discounting
#' 7) Interoception: includes all data related to the heart beat interoception task
#' 8) Notes: this is a reference databases that includes any research and/or parent notes or updates from each visit
#' 9) Prefer Not to Answer: this is a reference databases that includes any questions the 'prefer not to answer' option was marked. Only saved in case there is a desire to determine the number of responses on an item that were missing versus 'prefer not to answer'.
#' 10) Microstrucure: includes databases related to microstructure coding of meals
#'
#' To process the raw data, the raw databases from Qualtrics MUST follow the naming convention: Child_V1_YYYY-MM-DD.sav, Child_V1_Home_YYY-MM-DD.sav, Child_V1_Lab_YYY-MM-DD.sav, and Parent_V1_YYY-MM-DD.sav. The databases must all be in the SAME directory to be processed if the data_path is not entered and the directory organization does not follow the structure laid out in the DataManual.
#'
#' @param databases (optional) list of strings to indicate which databases to process. If not entered, all databases will be generated. Options include: 1) 'demo' for Demographic, 2) 'anthro' for Anthropometrics, 3) 'intake' for Intake, 4) 'food_qs' for food-related questionnaires, 5) 'psych_qs' for cognitive and psych related data, 6) 'dd' for Delay Discounting, 7) 'intero' for Interoception data, 8) 'notes' for Notes, 9) 'pna' for Prefer Not to Answer, and 10) 'micro' for microstructure data (note - microstructure will always be in separate databases)
#' @param model_DD Indicate if delay discounting data should be modeled. This will take an addition 3-5 minutes of processing time. Default = FALSE. The Delay Discounting database will only be generate if set to TRUE.
#' @param write_dat indicate whether to write databases. Default is TRUE.
#' @param write_path (optional) a string with the path indicating where to save the generated databases if write_dat is TRUE (default option). If no path is given, databases will be written to working directory.
#' @param return_data indicate whether to return databases to the environment. Default is FALSE.
#' @inheritParams util_fbs_parent_v1dat
#' @param child_file_pattern (optional) This is only needed if file naming deviates from the standard naming: e.g., Child_V#_*sav'. Only enter the string indicating a respondent (e.g., 'Child').
#' @param parent_file_pattern (optional) This is only needed if file naming deviates from the standard naming: e.g., Parent_V#_*sav'. Only enter the string indicating a respondent (e.g., 'Child').
#' @param visit_file_pattern (optional) This is only needed if file naming deviates from the standard naming for visits: e.g., V# . Only enter the string indicating a visit (e.g., 'V' for 'V1' or 'Visit_' for 'Visit_1').
#'
#' @return A list containing all databases that were generated
#'
#' @examples
#' #if in same working directory as data, want all databases, and the databases to be written to working directory:
#' fbs_data_proc <- fbs_databases(model_DD = TRUE)
#'
#' #if only want the Demographics and Intake databases written to working directory:
#' fbs_data_proc <- fbs_databases(databases = c('demo', 'intake'))
#'
#' #if want to work with the Demographics database but not write it to a file:
#' fbs_data_proc <- fbs_databases(databases = 'demo', write_dat = FALSE)
#'
#' \dontrun{
#' #databases must be strings. The following will not run:
#' fbs_data_proc <- fbs_databases(databases = c(demo, intake))
#'
#' }
#'
#' @seealso Raw data from Qualtrics is processed using the following scripts: \code{\link{util_fbs_merge_v1dat}}, \code{\link{util_fbs_merge_v2dat}}, \code{\link{util_fbs_merge_v3dat}}, \code{\link{util_fbs_merge_v4dat}}, \code{\link{util_fbs_merge_v5dat}}, \code{\link{util_fbs_merge_v6dat}}, \code{\link{util_fbs_merge_v7dat}}
#'
#'
#' @export
fbs_databases <- function(databases, model_DD = FALSE, write_dat = TRUE, write_path, data_path, return_data = FALSE, child_file_pattern, parent_file_pattern, visit_file_pattern) {
#### 1. Set up/initial checks #####
# check the file patterns
c_filepat_arg <- methods::hasArg(child_file_pattern)
if (isTRUE(c_filepat_arg)) {
if (!is.character(child_file_pattern)) {
stop("child_file_pattern must be entered as a string: e.g., 'Child'")
} else {
child_fp <- child_file_pattern
}
} else if (isFALSE(c_filepat_arg)) {
child_fp <- 'Child'
}
p_filepat_arg <- methods::hasArg(parent_file_pattern)
if (isTRUE(p_filepat_arg)) {
if (!is.character(parent_file_pattern)) {
stop("parent_file_pattern must be entered as a string: e.g., 'Parent'")
} else {
parent_fp <- parent_file_pattern
}
} else if (isFALSE(p_filepat_arg)) {
parent_fp <- 'Parent'
}
v_filepat_arg <- methods::hasArg(visit_file_pattern)
if (isTRUE(p_filepat_arg)) {
if (!is.character(visit_file_pattern)) {
stop("parent_file_pattern must be entered as a string: e.g., 'V' or 'Visit_'")
} else {
visit_fp <- visit_file_pattern
}
} else if (isFALSE(p_filepat_arg)) {
visit_fp <- 'V'
}
# check datapath
datapath_arg <- methods::hasArg(data_path)
if (isTRUE(datapath_arg)) {
if (!is.character(data_path)) {
stop("data_path must be entered as a string: e.g., '.../Participant_Data/untouchedRaw/")
}
#make universal to 'Untouched_Raw'
if (grepl('Qualtrics_Raw', data_path, fixed = TRUE)){
data_path <- gsub('Qualtrics_Raw', '', data_path)
} else if (grepl('Microstructure_Raw', data_path, fixed = TRUE)){
data_path <- gsub('Microstructure_Raw', '', data_path)
}
}
# check databases argument
databases_arg <- methods::hasArg(databases)
database_options <- c('demo', 'anthro', 'intake', 'food_qs', 'psych_qs', 'dd', 'intero', 'notes', 'pna', 'micro')
if (isTRUE(databases_arg)) {
databases_string <- sapply(databases, FUN = is.character)
ndatabases <- length(databases)
if (sum(databases_string) != ndatabases) {
stop("Not all items listed in databases are strings. All databases must be entered as strings and matach the following options: 'demo', 'anthro', 'intake', 'food_qs', 'psych_qs', 'dd', 'intero', 'notes', 'pna'.")
}
#check if entered databases match database_options
if (sum(databases %in% database_options) != ndatabases){
stop("Not all items listed in databases match available options. Options include: 'demo', 'anthro', 'intake', 'food_qs', 'psych_qs', 'dd', 'intero', 'notes', 'pna', 'micro'.")
}
} else {
databases <- NA
}
# check write_path
writepath_arg <- methods::hasArg(write_path)
if (isTRUE(writepath_arg)) {
if (!is.character(write_path)) {
stop("write_path must be entered as a string: e.g., '.../Participant_Data/Databases/")
}
}
#### 2. Get Visit Databases ####
## Visit 1 data - need for all databases
if (isTRUE(datapath_arg)){
v1_data <- util_fbs_merge_v1(child_file_pattern = paste0(child_fp, '_', visit_fp, '1'), parent_file_pattern = paste0(parent_fp, '_', visit_fp, '1'), data_path = paste0(data_path, '/Qualtrics_Raw/'))
} else {
v1_data <- util_fbs_merge_v1(child_file_pattern = paste0(child_fp, '_', visit_fp, '1'), parent_file_pattern = paste0(parent_fp, '_', visit_fp, '1'))
}
## Visit 2 - need for Anthroprometrics, food/eating behavior, and cog/psych databases
# requires the V4 parent database so check both v2_datestr_arg and v4_datestr_arg
if (isFALSE(databases_arg) | 'anthro' %in% databases | 'food_qs' %in% databases | 'psych_qs' %in% databases | 'intake' %in% databases | 'notes' %in% databases | 'pna' %in% databases){
if (isTRUE(datapath_arg)){
v2_data <- util_fbs_merge_v2(child_file_pattern = paste0(child_fp, '_', visit_fp, '2'), parent_file_pattern = paste0(parent_fp, '_', visit_fp, '2'), parentV4_file_pattern = paste0(parent_fp, '_', visit_fp, '4'), data_path = paste0(data_path, '/Qualtrics_Raw/'))
} else {
v2_data <- util_fbs_merge_v2(child_file_pattern = paste0(child_fp, '_', visit_fp, '2'), parent_file_pattern = paste0(parent_fp, '_', visit_fp, '2'), parentV4_file_pattern = paste0(parent_fp, '_', visit_fp, '4'))
}
}
## Visit 3 data - need for the food/eating behavior, cog/psych, and delay discounting databases
if (isFALSE(databases_arg) | 'food_qs' %in% databases | 'psych_qs' %in% databases | 'dd' %in% databases | 'intake' %in% databases | 'notes' %in% databases | 'pna' %in% databases){
if (isTRUE(datapath_arg)){
v3_data <- util_fbs_merge_v3(child_file_pattern = paste0(child_fp, '_', visit_fp, '3'), parent_file_pattern = paste0(parent_fp, '_', visit_fp, '3'), data_path = paste0(data_path, '/Qualtrics_Raw/'), model_DD = model_DD)
} else {
v3_data <- util_fbs_merge_v3(child_file_pattern = paste0(child_fp, '_', visit_fp, '3'), parent_file_pattern = paste0(parent_fp, '_', visit_fp, '3'), model_DD = model_DD)
}
}
## Visit 4 data - need for the demographics, food/eating behavior, and cog/psych databases
if (isFALSE(databases_arg) | 'demo' %in% databases | 'food_qs' %in% databases | 'psych_qs' %in% databases | 'intake' %in% databases | 'notes' %in% databases | 'pna' %in% databases){
if (isTRUE(datapath_arg)){
v4_data <- util_fbs_merge_v4(child_file_pattern = paste0(child_fp, '_', visit_fp, '4'), parent_file_pattern = paste0(parent_fp, '_', visit_fp, '4'), data_path = paste0(data_path, '/Qualtrics_Raw/'))
} else {
v4_data <- util_fbs_merge_v4(child_file_pattern = paste0(child_fp, '_', visit_fp, '4'), parent_file_pattern = paste0(parent_fp, '_', visit_fp, '4'))
}
}
## Visit 5 data - need for the demographics database
if (isFALSE(databases_arg) | 'demo' %in% databases | 'intake' %in% databases | 'food_qs' %in% databases | 'notes' %in% databases | 'pna' %in% databases){
if (isTRUE(datapath_arg)){
v5_data <- util_fbs_merge_v5(child_file_pattern = paste0(child_fp, '_', visit_fp, '5'), parent_file_pattern = paste0(parent_fp, '_', visit_fp, '5'), data_path = paste0(data_path, '/Qualtrics_Raw/'))
} else {
v5_data <- util_fbs_merge_v5(child_file_pattern = paste0(child_fp, '_', visit_fp, '5'), parent_file_pattern = paste0(parent_fp, '_', visit_fp, '5'))
}
}
## Visit 6 data - need for the fMRI database
if (isFALSE(databases_arg) | 'food_qs' %in% databases | 'notes' %in% databases | 'pna' %in% databases){
if (isTRUE(datapath_arg)){
v6_data <- util_fbs_merge_v6(child_file_pattern = paste0(child_fp, '_', visit_fp, '6'), parent_file_pattern = paste0(parent_fp, '_', visit_fp, '6'), data_path = paste0(data_path, '/Qualtrics_Raw/'))
} else {
v6_data <- util_fbs_merge_v6(child_file_pattern = paste0(child_fp, '_', visit_fp, '6'), parent_file_pattern = paste0(parent_fp, '_', visit_fp, '6'))
}
}
## Visit 7 data - need for the demographics, anthroprometrics, food/eating behavior, and cog/psych databases
if (isFALSE(databases_arg) | 'demo' %in% databases | 'anthro' %in% databases | 'food_qs' %in% databases | 'psych_qs' %in% databases | 'intake' %in% databases | 'notes' %in% databases | 'pna' %in% databases){
if (isTRUE(datapath_arg)){
v7_data <- util_fbs_merge_v7(child_file_pattern = paste0(child_fp, '_', visit_fp, '7'), parent_file_pattern = paste0(parent_fp, '_', visit_fp, '7'), data_path = paste0(data_path, '/Qualtrics_Raw/'))
} else {
v7_data <- util_fbs_merge_v7(child_file_pattern = paste0(child_fp, '_', visit_fp, '7'), parent_file_pattern = paste0(parent_fp, '_', visit_fp, '7'))
}
}
## Microstructure data
if (isFALSE(databases_arg) | 'micro' %in% databases){
if (isTRUE(datapath_arg)){
micro_data <- util_fbs_merge_micro(data_path = paste0(data_path, '/Microstructure_Raw/'))
} else {
micro_data <- util_fbs_merge_micro()
}
}
#### 3. Make Databases #####
#empty list
database_return <- list()
#get cross-database variables
common_demo_data <- v1_data[['data']][c(1:13, 20:21, 60:64, 94:97, 232:233)]
common_demo_labels <- v1_data[['dict']][c(1:13, 20:21, 60:64, 94:97, 232:233)]
names(common_demo_data)[2] <- 'v1_date'
names(common_demo_labels)[2] <- 'v1_date'
## 3b) Demographics ####
if (isFALSE(databases_arg) | 'demo' %in% databases){
#visit 1
v1_demo_data <- v1_data[['data']][c(1:13, 20:21, 60:61, 63, 94:97, 232:233, 14:19, 22:59, 65:87)]
v1_demo_labels <- v1_data[['dict']][c(1:13, 20:21, 60:61, 63, 94:97, 232:233, 14:19, 22:59, 65:87)]
names(v1_demo_data)[2] <- 'v1_date'
names(v1_demo_labels)[2] <- 'v1_date'
#visit 4
v4_demo_data <- v4_data[['data']][c(1:2, 6:99)]
v4_demo_labels <- v4_data[['dict']][c(1:2, 6:99)]
names(v4_demo_data)[2] <- 'v4_date'
names(v4_demo_labels)[2] <- 'v4_date'
#visit 5
v5_demo_data <- v5_data[['data']][c(1:34)]
v5_demo_labels <- v5_data[['dict']][c(1:34)]
names(v5_demo_data)[2] <- 'v5_date'
names(v5_demo_labels)[2] <- 'v5_date'
#visit 7
v7_demo_data <- v7_data[['data']][c(1:2, 5:120)]
v7_demo_labels <- v7_data[['dict']][c(1:2, 5:120)]
names(v7_demo_data)[2] <- 'v7_date'
names(v7_demo_labels)[2] <- 'v7_date'
#re-name variables with 'v7_' and add 'Visit 7 - ' to labels
for (v in 3:ncol(v7_demo_data)){
#names
var_name <- names(v7_demo_data)[v]
v7_name <- paste0('v7_', var_name)
names(v7_demo_data)[v] <- v7_name
#labels
v7_demo_labels[[var_name]] <- paste0('Visit 7 - ', v7_demo_labels[[var_name]])
}
#make names match
names(v7_demo_labels) <- names(v7_demo_data)
## merge databases
#weird issue - couldn't rbind due to some `labels` to logical issue - brute force fix
set_attr_hfssm_hh2 <- attributes(v4_demo_data[['hfssm_hh2']])
set_attr_hfssm_hh3 <- attributes(v4_demo_data[['hfssm_hh3']])
set_attr_hfssm_hh4 <- attributes(v4_demo_data[['hfssm_hh4']])
set_attr_hfssm_ch1 <- attributes(v4_demo_data[['hfssm_ch1']])
set_attr_hfssm_ch2 <- attributes(v4_demo_data[['hfssm_ch2']])
set_attr_hfssm_ch3 <- attributes(v4_demo_data[['hfssm_ch3']])
set_attr_hfssm_ch5a <- attributes(v4_demo_data[['hfssm_ch5a']])
set_attr_hfias_category <- attributes(v4_demo_data[['hfias_category']])
v4_demo_data[c(5:7, 15:17, 20, 50)] <- sapply(v4_demo_data[c(5:7, 15:17, 20, 50)], as.numeric)
demo_v1v4_data <- merge(v1_demo_data, v4_demo_data, by = 'id', all = TRUE)
demo_v1v4v5_data <- merge(demo_v1v4_data, v5_demo_data, by = 'id', all = TRUE)
demo_v1v4v5v7_data <- merge(demo_v1v4v5_data, v7_demo_data, by = 'id', all = TRUE)
#reset labels due to brute force fix
attributes(demo_v1v4v5v7_data[['hfssm_hh2']]) <- set_attr_hfssm_hh2
attributes(demo_v1v4v5v7_data[['hfssm_hh3']]) <- set_attr_hfssm_hh3
attributes(demo_v1v4v5v7_data[['hfssm_hh4']]) <- set_attr_hfssm_hh4
attributes(demo_v1v4v5v7_data[['hfssm_ch1']]) <- set_attr_hfssm_ch1
attributes(demo_v1v4v5v7_data[['hfssm_ch2']]) <- set_attr_hfssm_ch2
attributes(demo_v1v4v5v7_data[['hfssm_ch3']]) <- set_attr_hfssm_ch3
attributes(demo_v1v4v5v7_data[['hfssm_ch5a']]) <- set_attr_hfssm_ch5a
attributes(demo_v1v4v5v7_data[['hfias_category']]) <- set_attr_hfias_category
class(demo_v1v4v5v7_data[["hfssm_hh2"]]) <- c("haven_labelled", "vctrs_vctr", "double")
class(demo_v1v4v5v7_data[["hfssm_hh3"]]) <- c("haven_labelled", "vctrs_vctr", "double")
class(demo_v1v4v5v7_data[["hfssm_hh4"]]) <- c("haven_labelled", "vctrs_vctr", "double")
class(demo_v1v4v5v7_data[["hfssm_ch1"]]) <- c("haven_labelled", "vctrs_vctr", "double")
class(demo_v1v4v5v7_data[["hfssm_ch2"]]) <- c("haven_labelled", "vctrs_vctr", "double")
class(demo_v1v4v5v7_data[["hfssm_ch3"]]) <- c("haven_labelled", "vctrs_vctr", "double")
class(demo_v1v4v5v7_data[["hfssm_ch5a"]]) <- c("haven_labelled", "vctrs_vctr", "double")
class(demo_v1v4v5v7_data[["hfias_category"]]) <- c("haven_labelled", "vctrs_vctr", "double")
#get labels
demographic_labels <- c(v1_demo_labels, v4_demo_labels[2:length(v4_demo_labels)], v5_demo_labels[2:length(v5_demo_labels)], v7_demo_labels[2:length(v7_demo_labels)])
# ensure labels are up to date
demographic_data = sjlabelled::set_label(demo_v1v4v5v7_data, label = matrix(unlist(demographic_labels, use.names = FALSE)))
# add to list
database_return <- c(database_return, list(demo_data = demographic_data, demo_dict = demographic_labels))
# write out
if (isTRUE(write_dat)){
#data dictionary
demo_dict <- labelled::generate_dictionary(demographic_data, details = TRUE)
demo_dict$label <- matrix(unlist(demographic_labels, use.names = FALSE))
names(demo_dict)[1] <- 'column'
#interprets the value_labels as list so need to make everything a character
demo_dict_write <- sapply(demo_dict[c(1:3, 6:8, 12:13)], FUN = as.character)
if (isTRUE(writepath_arg)){
haven::write_sav(demographic_data, path = paste0(write_path, 'demographics_data.sav'))
write.csv(demo_dict_write, file = paste0(write_path, 'dict-demographics_data.csv'), row.names = FALSE)
} else {
haven::write_sav(demographic_data, path = 'demographics_data.sav')
write.csv(demo_dict_write, file = 'dict-demographics_data.csv', row.names = FALSE)
}
}
}
## 3b) Anthropometrics ####
if (isFALSE(databases_arg) | 'anthro' %in% databases){
#visit 1
v1_anthro_data <- v1_data[['data']][c(1, 88:336)]
v1_anthro_labels <- v1_data[['dict']][c(1, 88:336)]
#visit 2
v2_anthro_data <- v2_data[['data']][c(1:2, 4:29)]
v2_anthro_labels <- v2_data[['dict']][c(1:2, 4:29)]
names(v2_anthro_data)[2] <- 'v2_date'
names(v2_anthro_labels)[2] <- 'v2_date'
#visit 7
v7_anthro_data <- v7_data[['data']][c(1:2, 121:369, 376:398)]
v7_anthro_labels <- v7_data[['dict']][c(1:2, 121:369, 376:398)]
names(v7_anthro_data)[2] <- 'v7_date'
names(v7_anthro_labels)[2] <- 'v7_date'
#re-name variables with 'v7_' and add 'Visit 7 - ' to labels
for (v in 3:ncol(v7_anthro_data)){
#names
var_name <- names(v7_anthro_data)[v]
v7_name <- paste0('v7_', var_name)
names(v7_anthro_data)[v] <- v7_name
#labels
v7_anthro_labels[[var_name]] <- paste0('Visit 7 - ', v7_anthro_labels[[var_name]])
}
#make names match
names(v7_anthro_labels) <- names(v7_anthro_data)
## merge databases from v1
anthro_demov1_data <- merge(common_demo_data[1:20], v1_anthro_data, by = 'id', all = TRUE)
## merge databases - set all.x = FALSE so only get the participants with data at later visits/have not been screened out
anthro_demov1v2_data <- merge(anthro_demov1_data, v2_anthro_data, by = 'id', all.x = FALSE, all.y = TRUE)
## other merges - set all = TRUE so get all participants in visits 2-7
anthro_demov1v2v7_data <- merge(anthro_demov1v2_data, v7_anthro_data, by = 'id', all = TRUE)
#get labels
anthroprometric_labels <- c(common_demo_labels[1:20], v1_anthro_labels[2:length(v1_anthro_labels)], v2_anthro_labels[2:length(v2_anthro_labels)], v7_anthro_labels[2:length(v7_anthro_labels)])
# ensure labels are up to date
anthroprometric_data = sjlabelled::set_label(anthro_demov1v2v7_data, label = matrix(unlist(anthroprometric_labels, use.names = FALSE)))
# add to list
database_return <- c(database_return, list(anthro_data = anthroprometric_data, anthro_dict = anthroprometric_labels))
# write out
if (isTRUE(write_dat)){
#data dictionary
antho_dict <- labelled::generate_dictionary(anthroprometric_data, details = TRUE)
antho_dict$label <- matrix(unlist(anthroprometric_labels, use.names = FALSE))
names(antho_dict)[1] <- 'column'
#interprets the value_labels as list so need to make everything a character
antho_dict_write <- sapply(antho_dict[c(1:3, 6:8, 12:13)], FUN = as.character)
if (isTRUE(writepath_arg)){
haven::write_sav(anthroprometric_data, path = paste0(write_path, 'anthro_data.sav'))
write.csv(antho_dict_write, file = paste0(write_path, 'dict-anthro_data.csv'), row.names = FALSE)
} else {
haven::write_sav(anthroprometric_data, path = 'anthro_data.sav')
write.csv(antho_dict_write, file = 'dict_anthro_data.csv', row.names = FALSE)
}
}
}
## 3c) Intake ####
if (isFALSE(databases_arg) | 'intake' %in% databases){
#visit 1
v1_intake_data <- v1_data[['data']][c(1, 337:505)]
v1_intake_labels <- v1_data[['dict']][c(1, 337:505)]
#re-name variables with 'v1_' and add 'Visit 1 - ' to labels
for (v in 2:ncol(v1_intake_data)){
#names
var_name <- names(v1_intake_data)[v]
v1_name <- paste0('v1_', var_name)
names(v1_intake_data)[v] <- v1_name
#labels
v1_intake_labels[[var_name]] <- paste0('Visit 1 - ', v1_intake_labels[[var_name]])
}
names(v1_intake_labels) <- names(v1_intake_data)
#visit 2
v2_intake_data <- v2_data[['data']][c(1:2, 30:76)]
v2_intake_labels <- v2_data[['dict']][c(1:2, 30:76)]
names(v2_intake_data)[2] <- 'v2_date'
names(v2_intake_labels)[2] <- 'v2_date'
#re-name variables with 'v2_' and add 'Visit 2 - ' to labels
for (v in 3:ncol(v2_intake_data)){
#names
var_name <- names(v2_intake_data)[v]
v2_name <- paste0('v2_', var_name)
names(v2_intake_data)[v] <- v2_name
#labels
v2_intake_labels[[var_name]] <- paste0('Visit 2 - ', v2_intake_labels[[var_name]])
}
names(v2_intake_labels) <- names(v2_intake_data)
#visit 3
v3_intake_data <- v3_data[['data']][1:49]
v3_intake_labels <- v3_data[['dict']][1:49]
names(v3_intake_data)[2] <- 'v3_date'
names(v3_intake_labels)[2] <- 'v3_date'
#re-name variables with 'v3_' and add 'Visit 3 - ' to labels
for (v in 3:ncol(v3_intake_data)){
#names
var_name <- names(v3_intake_data)[v]
v3_name <- paste0('v3_', var_name)
names(v3_intake_data)[v] <- v3_name
#labels
v3_intake_labels[[var_name]] <- paste0('Visit 3 - ', v3_intake_labels[[var_name]])
}
names(v3_intake_labels) <- names(v3_intake_data)
#visit 4
v4_intake_data <- v4_data[['data']][c(1:2, 100:146)]
v4_intake_labels <- v4_data[['dict']][c(1:2, 100:146)]
names(v4_intake_data)[2] <- 'v4_date'
names(v4_intake_labels)[2] <- 'v4_date'
#re-name variables with 'v4_' and add 'Visit 4 - ' to labels
for (v in 3:ncol(v4_intake_data)){
#names
var_name <- names(v4_intake_data)[v]
v4_name <- paste0('v4_', var_name)
names(v4_intake_data)[v] <- v4_name
#labels
v4_intake_labels[[var_name]] <- paste0('Visit 4 - ', v4_intake_labels[[var_name]])
}
names(v4_intake_labels) <- names(v4_intake_data)
#visit 5
v5_intake_data <- v5_data[['data']][c(1:2, 35:81)]
v5_intake_labels <- v5_data[['dict']][c(1:2, 35:81)]
names(v5_intake_data)[2] <- 'v5_date'
names(v5_intake_labels)[2] <- 'v5_date'
#re-name variables with 'v5_' and add 'Visit 5 - ' to labels
for (v in 3:ncol(v5_intake_data)){
#names
var_name <- names(v5_intake_data)[v]
v5_name <- paste0('v5_', var_name)
names(v5_intake_data)[v] <- v5_name
#labels
v5_intake_labels[[var_name]] <- paste0('Visit 5 - ', v5_intake_labels[[var_name]])
}
names(v5_intake_labels) <- names(v5_intake_data)
#visit 7
v7_intake_data <- v7_data[['data']][c(1:2, 399:571)]
v7_intake_labels <- v7_data[['dict']][c(1:2, 399:571)]
names(v7_intake_data)[2] <- 'v7_date'
names(v7_intake_labels)[2] <- 'v7_date'
#re-name variables with 'v7_' and add 'Visit 7 - ' to labels
for (v in 3:ncol(v7_intake_data)){
#names
var_name <- names(v7_intake_data)[v]
v7_name <- paste0('v7_', var_name)
names(v7_intake_data)[v] <- v7_name
#labels
v7_intake_labels[[var_name]] <- paste0('Visit 7 - ', v7_intake_labels[[var_name]])
}
#make names match
names(v7_intake_labels) <- names(v7_intake_data)
## merge databases from v1
intake_demov1_data <- merge(common_demo_data, v1_intake_data, by = 'id', all = TRUE)
## merge databases - set all.x = FALSE so only get the participants with data at later visits/have not been screened out
intake_demov1v2_data <- merge(intake_demov1_data, v2_intake_data, by = 'id', all.x = FALSE, all.y = TRUE)
## other merges - set all = TRUE so get all participants in visits 2-7
intake_demov1v2v3_data <- merge(intake_demov1v2_data, v3_intake_data, by = 'id', all = TRUE)
intake_demov1v2v3v4_data <- merge(intake_demov1v2v3_data, v4_intake_data, by = 'id', all = TRUE)
intake_demov1v2v3v4v5_data <- merge(intake_demov1v2v3v4_data, v5_intake_data, by = 'id', all = TRUE)
intake_demov1v23v4v5v7_data <- merge(intake_demov1v2v3v4v5_data, v7_intake_data, by = 'id', all = TRUE)
#get labels
intake_labels <- c(common_demo_labels, v1_intake_labels[2:length(v1_intake_labels)], v2_intake_labels[2:length(v2_intake_labels)], v3_intake_labels[2:length(v3_intake_labels)], v4_intake_labels[2:length(v4_intake_labels)], v5_intake_labels[2:length(v5_intake_labels)], v7_intake_labels[2:length(v7_intake_labels)])
# ensure labels are up to date
intake_data = sjlabelled::set_label(intake_demov1v23v4v5v7_data, label = matrix(unlist(intake_labels, use.names = FALSE)))
## add sort by portion size to database
ps_vars <- c('id', 'date', 'freddy_pre_meal', 'freddy_post_meal', 'vas_mac_cheese', 'vas_chkn_nug', 'vas_broccoli', 'vas_grape', 'vas_water', 'rank_mac_cheese', 'rank_chkn_nug', 'rank_broccoli', 'rank_grape', 'meal_start', 'meal_end', 'meal_dur', 'noplate_chkn_nug_g', 'plate_chkn_nug_g', 'post_chkn_nug_g', 'consumed_chkn_nug_g', 'consumed_chkn_nug_kcal', 'noplate_mac_cheese_g', 'plate_mac_cheese_g', 'post_mac_cheese_g', 'consumed_mac_cheese_g', 'consumed_mac_cheese_kcal', 'noplate_grapes_g', 'plate_grapes_g', 'post_grapes_g', 'consumed_grapes_g', 'consumed_grapes_kcal', 'noplate_broccoli_g', 'plate_broccoli_g', 'post_broccoli_g', 'consumed_broccoli_g', 'consumed_broccoli_kcal', 'noplate_ketchup_g', 'plate_ketchup_g', 'post_ketchup_g', 'consumed_ketchup_g', 'consumed_ketchup_kcal', 'noplate_water_g', 'plate_water_g', 'post_water_g', 'consumed_water_g', 'total_g', 'total_kcal')
ps_convert_fn <- function(intake_data, ps, ps_var) {
if (ps_var == 'id'){
ps_var_dat <- intake_data[['id']]
} else {
#visit vars
v2_var_name <- paste0('v2_', ps_var)
v3_var_name <- paste0('v3_', ps_var)
v4_var_name <- paste0('v4_', ps_var)
v5_var_name <- paste0('v5_', ps_var)
#get new variable added to dataset
ps_value <- ps -1
if (ps_var == 'date'){
ps_var_dat <- ifelse(intake_data[['v2_meal_ps']] == ps_value, as.character(intake_data[[v2_var_name]]), ifelse(intake_data[['v3_meal_ps']] == ps_value, as.character(intake_data[[v3_var_name]]), ifelse(intake_data[['v4_meal_ps']] == ps_value, as.character(intake_data[[v4_var_name]]), ifelse(intake_data[['v5_meal_ps']] == ps_value, as.character(intake_data[[v5_var_name]]), NA))))
} else {
ps_var_dat <- ifelse(intake_data[['v2_meal_ps']] == ps_value, intake_data[[v2_var_name]], ifelse(intake_data[['v3_meal_ps']] == ps_value, intake_data[[v3_var_name]], ifelse(intake_data[['v4_meal_ps']] == ps_value, intake_data[[v4_var_name]], ifelse(intake_data[['v5_meal_ps']] == ps_value, intake_data[[v5_var_name]], NA))))
}
}
return(ps_var_dat)
}
## portion size data
ps1_intake_data <- as.data.frame(sapply(ps_vars, FUN = ps_convert_fn, intake_data = intake_data, ps = 1, simplify = TRUE, USE.NAMES = TRUE))
names(ps1_intake_data)[2:ncol(ps1_intake_data)] <- paste0('ps1_', ps_vars[2:length(ps_vars)])
ps1_intake_data[c(1, 3:13, 16:47)] <- sapply(ps1_intake_data[c(1, 3:13, 16:47)], as.numeric)
intake_data <- merge(intake_data, ps1_intake_data, by = 'id', all = TRUE)
ps2_intake_data <- as.data.frame(sapply(ps_vars, FUN = ps_convert_fn, intake_data = intake_data, ps = 2, simplify = TRUE, USE.NAMES = TRUE))
names(ps2_intake_data)[2:ncol(ps2_intake_data)] <- paste0('ps2_', ps_vars[2:length(ps_vars)])
ps2_intake_data[c(1, 3:13, 16:47)] <- sapply(ps2_intake_data[c(1, 3:13, 16:47)], as.numeric)
intake_data <- merge(intake_data, ps2_intake_data, by = 'id', all = TRUE)
ps3_intake_data <- as.data.frame(sapply(ps_vars, FUN = ps_convert_fn, intake_data = intake_data, ps = 3, simplify = TRUE, USE.NAMES = TRUE))
names(ps3_intake_data)[2:ncol(ps3_intake_data)] <- paste0('ps3_', ps_vars[2:length(ps_vars)])
ps3_intake_data[c(1, 3:13, 16:47)] <- sapply(ps3_intake_data[c(1, 3:13, 16:47)], as.numeric)
intake_data <- merge(intake_data, ps3_intake_data, by = 'id', all = TRUE)
ps4_intake_data <- as.data.frame(sapply(ps_vars, FUN = ps_convert_fn, intake_data = intake_data, ps = 4, simplify = TRUE, USE.NAMES = TRUE))
names(ps4_intake_data)[2:ncol(ps4_intake_data)] <- paste0('ps4_', ps_vars[2:length(ps_vars)])
ps4_intake_data[c(1, 3:13, 16:47)] <- sapply(ps4_intake_data[c(1, 3:13, 16:47)], as.numeric)
intake_data <- merge(intake_data, ps4_intake_data, by = 'id', all = TRUE)
## update/add portion size labels
for (ps in 1:4) {
for (v in 2:length(ps_vars)){
#new portion size name
ps_var_name <- paste0('ps', ps, '_', ps_vars[v])
#visit vars
v2_var_name <- paste0('v2_', ps_vars[v])
#get label information with visit information removed
novisit_label <- gsub('Visit 2 - ', '', intake_labels[[v2_var_name]], fixed = TRUE)
#add new label with portion size information
intake_labels[[ps_var_name]] <- paste0('Portion Size ', ps, ': ', novisit_label)
}
}
#update and match labels
names(intake_labels) <- names(intake_data)
intake_data = sjlabelled::set_label(intake_data, label = matrix(unlist(intake_labels, use.names = FALSE)))
# add to list
database_return <- c(database_return, list(intake_data = intake_data, intake_dict = intake_labels))
# write out
if (isTRUE(write_dat)){
#data dictionary
intake_dict <- labelled::generate_dictionary(intake_data, details = TRUE)
intake_dict$label <- matrix(unlist(intake_labels, use.names = FALSE))
names(intake_dict)[1] <- 'column'
#interprets the value_labels as list so need to make everything a character
intake_dict_write <- sapply(intake_dict[c(1:3, 6:8, 12:13)], FUN = as.character)
if (isTRUE(writepath_arg)){
haven::write_sav(intake_data, path = paste0(write_path, 'intake_data.sav'))
write.csv(intake_dict_write, file = paste0(write_path, 'dict-intake_data.csv'), row.names = FALSE)
} else {
haven::write_sav(intake_data, path = 'intake_data.sav')
write.csv(intake_dict_write, file = 'dict-intake_data.csv', row.names = FALSE)
}
}
}
## 3d) Eating Behavior/Food Intake Questionnaires ####
if (isFALSE(databases_arg) | 'food_qs' %in% databases){
#visit 1
v1_foodqs_data <- v1_data[['data']][c(1, 506:821)]
v1_foodqs_labels <- v1_data[['dict']][c(1, 506:821)]
#visit 2
v2_foodqs_data <- v2_data[['data']][c(1:2, 77:299)]
v2_foodqs_labels <- v2_data[['dict']][c(1:2, 77:299)]
names(v2_foodqs_data)[2] <- 'v2_date'
names(v2_foodqs_labels)[2] <- 'v2_date'
#visit 3
v3_foodqs_data <- v3_data[['data']][c(1:2, 50:160)]
v3_foodqs_labels <- v3_data[['dict']][c(1:2, 50:160)]
names(v3_foodqs_data)[2] <- 'v3_date'
names(v3_foodqs_labels)[2] <- 'v3_date'
#visit 4
v4_foodqs_data <- v4_data[['data']][c(1:2, 147:158)]
v4_foodqs_labels <- v4_data[['dict']][c(1:2, 147:158)]
names(v4_foodqs_data)[2] <- 'v4_date'
names(v4_foodqs_labels)[2] <- 'v4_date'
#visit 5
v5_foodqs_data <- v5_data[['data']][c(1:2, 82:104)]
v5_foodqs_labels <- v5_data[['dict']][c(1:2, 82:104)]
names(v5_foodqs_data)[2] <- 'v5_date'
names(v5_foodqs_labels)[2] <- 'v5_date'
#visit 6
v6_foodqs_data <- v6_data[['data']][c(1:2, 330:336)]
v6_foodqs_labels <- v6_data[['dict']][c(1:2, 330:336)]
names(v6_foodqs_data)[2] <- 'v6_date'
names(v6_foodqs_labels)[2] <- 'v6_date'
#visit 7
v7_foodqs_data <- v7_data[['data']][c(1:2, 370:375, 572:887)]
v7_foodqs_labels <- v7_data[['dict']][c(1:2, 370:375, 572:887)]
names(v7_foodqs_data)[2] <- 'v7_date'
names(v7_foodqs_labels)[2] <- 'v7_date'
#re-name variables with 'v7_' and add 'Visit 7 - ' to labels
for (v in 3:ncol(v7_foodqs_data)){
#names
var_name <- names(v7_foodqs_data)[v]
v7_name <- paste0('v7_', var_name)
names(v7_foodqs_data)[v] <- v7_name
#labels
v7_foodqs_labels[[var_name]] <- paste0('Visit 7 - ', v7_foodqs_labels[[var_name]])
}
#make names match
names(v7_foodqs_labels) <- names(v7_foodqs_data)
## merge databases from v1
foodqs_demov1_data <- merge(common_demo_data, v1_foodqs_data, by = 'id', all = TRUE)
## merge databases - set all.x = FALSE so only get the participants with data at later visits/have not been screened out
foodqs_demov1v2_data <- merge(foodqs_demov1_data, v2_foodqs_data, by = 'id', all.x = FALSE, all.y = TRUE)
## other merges - set all = TRUE so get all participants in visits 2-7
foodqs_demov1v2v3_data <- merge(foodqs_demov1v2_data, v3_foodqs_data, by = 'id', all = TRUE)
foodqs_demov1v2v3v4_data <- merge(foodqs_demov1v2v3_data, v4_foodqs_data, by = 'id', all = TRUE)
foodqs_demov1v2v3v4v5_data <- merge(foodqs_demov1v2v3v4_data, v5_foodqs_data, by = 'id', all = TRUE)
foodqs_demov1v2v3v4v5v6_data <- merge(foodqs_demov1v2v3v4v5_data, v6_foodqs_data, by = 'id', all = TRUE)
foodqs_demov1v2v3v4v5v6v7_data <- merge(foodqs_demov1v2v3v4v5v6_data, v7_foodqs_data, by = 'id', all = TRUE)
#get labels
foodqs_labels <- c(common_demo_labels, v1_foodqs_labels[2:length(v1_foodqs_labels)], v2_foodqs_labels[2:length(v2_foodqs_labels)], v3_foodqs_labels[2:length(v3_foodqs_labels)], v4_foodqs_labels[2:length(v4_foodqs_labels)], v5_foodqs_labels[2:length(v5_foodqs_labels)], v6_foodqs_labels[2:length(v6_foodqs_labels)], v7_foodqs_labels[2:length(v7_foodqs_labels)])
# ensure labels are up to date
foodqs_data = sjlabelled::set_label(foodqs_demov1v2v3v4v5v6v7_data, label = matrix(unlist(foodqs_labels, use.names = FALSE)))
# add to list
database_return <- c(database_return, list(foodqs_data = foodqs_data, foodqs_dict = foodqs_labels))
#write out
if (isTRUE(write_dat)){
#data dictionary
foodqs_dict <- labelled::generate_dictionary(foodqs_data, details = TRUE)
foodqs_dict$label <- matrix(unlist(foodqs_labels, use.names = FALSE))
names(foodqs_dict)[1] <- 'column'
#interprets the value_labels as list so need to make everything a character
foodqs_dict_write <- sapply(foodqs_dict[c(1:3, 6:8, 12:13)], FUN = as.character)
if (isTRUE(writepath_arg)){
haven::write_sav(foodqs_data, path = paste0(write_path, 'qs_eatbeh_bodyimage.sav'))
write.csv(foodqs_dict_write, file = paste0(write_path, 'dict-qs_eatbeh_bodyimage.csv'), row.names = FALSE)
} else {
haven::write_sav(foodqs_data, path = 'qs_eatbeh_bodyimage.sav')
write.csv(foodqs_dict_write, file = 'dict-qs_eatbeh_bodyimage.csv', row.names = FALSE)
}
}
}
## 3e) Cognitive and Psychosocial Questionnaires/Measures ####
if (isFALSE(databases_arg) | 'psych_qs' %in% databases){
#visit 2
v2_psychqs_data <- v2_data[['data']][c(1:2, 300:458)]
v2_psychqs_labels <- v2_data[['dict']][c(1:2, 300:458)]
names(v2_psychqs_data)[2] <- 'v2_date'
names(v2_psychqs_labels)[2] <- 'v2_date'
#visit 3
v3_psychqs_data <- v3_data[['data']][c(1:2, 161:248)]
v3_psychqs_labels <- v3_data[['dict']][c(1:2, 161:248)]
names(v3_psychqs_data)[2] <- 'v3_date'
names(v3_psychqs_labels)[2] <- 'v3_date'
#visit 4
v4_psychqs_data <- v4_data[['data']][c(1:2, 159:280)]
v4_psychqs_labels <- v4_data[['dict']][c(1:2, 159:280)]
names(v4_psychqs_data)[2] <- 'v4_date'
names(v4_psychqs_labels)[2] <- 'v4_date'
#visit 7
v7_psychqs_data <- v7_data[['data']][c(1:2, 888:998)]
v7_psychqs_labels <- v7_data[['dict']][c(1:2, 888:998)]
names(v7_psychqs_data)[2] <- 'v7_date'
names(v7_psychqs_labels)[2] <- 'v7_date'
#re-name variables with 'v7_' and add 'Visit 7 - ' to labels
for (v in 3:ncol(v7_psychqs_data)){
#names
var_name <- names(v7_psychqs_data)[v]
v7_name <- paste0('v7_', var_name)
names(v7_psychqs_data)[v] <- v7_name
#labels
v7_psychqs_labels[[var_name]] <- paste0('Visit 7 - ', v7_psychqs_labels[[var_name]])
}
#make names match
names(v7_psychqs_labels) <- names(v7_psychqs_data)
## merge databases - set all.x = FALSE so only get the participants with data at later visits/have not been screened out
psychqs_demov2_data <- merge(common_demo_data, v2_psychqs_data, by = 'id', all.x = FALSE, all.y = TRUE)
## other merges - set all = TRUE so get all participants in visits 2-7
psychqs_demov2v3_data <- merge(psychqs_demov2_data, v3_psychqs_data, by = 'id', all = TRUE)
psychqs_demov2v3v4_data <- merge(psychqs_demov2v3_data, v4_psychqs_data, by = 'id', all = TRUE)
psychqs_demov2v3v4v7_data <- merge(psychqs_demov2v3v4_data, v7_psychqs_data, by = 'id', all = TRUE)
#get labels
psychqs_labels <- c(common_demo_labels, v2_psychqs_labels[2:length(v2_psychqs_labels)], v3_psychqs_labels[2:length(v3_psychqs_labels)], v4_psychqs_labels[2:length(v4_psychqs_labels)], v7_psychqs_labels[2:length(v7_psychqs_labels)])
# ensure labels are up to date
psychqs_data = sjlabelled::set_label(psychqs_demov2v3v4v7_data, label = matrix(unlist(psychqs_labels, use.names = FALSE)))
# add to list
database_return <- c(database_return, list(psychqs_data = psychqs_data, psychqs_dict = psychqs_labels))
#write out
if (isTRUE(write_dat)){
#data dictionary
psychqs_dict <- labelled::generate_dictionary(psychqs_data, details = TRUE)
psychqs_dict$label <- matrix(unlist(psychqs_labels, use.names = FALSE))
names(psychqs_dict)[1] <- 'column'
#interprets the value_labels as list so need to make everything a character
psychqs_dict_write <- sapply(psychqs_dict[c(1:3, 6:8, 12:13)], FUN = as.character)
if (isTRUE(writepath_arg)){
haven::write_sav(psychqs_data, path = paste0(write_path, 'qs_cog_psych_soc.sav'))
write.csv(psychqs_dict_write, file = paste0(write_path, 'dict-qs_cog_psych_soc.csv'), row.names = FALSE)
} else {
haven::write_sav(psychqs_data, path = 'qs_cog_psych_soc.sav')
write.csv(psychqs_dict_write, file = 'dict-qs_cog_psych_soc.csv', row.names = FALSE)
}
}
}
## 3f) Delay Discounting ####
if (isFALSE(databases_arg) | 'dd' %in% databases ){
#visit 3
v3_dd_data <- v3_data[['data']][c(1:2, 249:325)]
v3_dd_labels <- v3_data[['dict']][c(1:2, 249:325)]
names(v3_dd_data)[2] <- 'v3_date'
names(v3_dd_labels)[2] <- 'v3_date'
## merge databases - set all.x = FALSE so only get the participants with data at later visits/have not been screened out
dd_demov3_data <- merge(common_demo_data, v3_dd_data, by = 'id', all.x = FALSE, all.y = TRUE)
#get labels
dd_labels <- c(common_demo_labels, v3_dd_labels[2:length(v3_dd_labels)])
# ensure labels are up to date
dd_data = sjlabelled::set_label(dd_demov3_data, label = matrix(unlist(dd_labels, use.names = FALSE)))
# add to list
database_return <- c(database_return, list(dd_data = dd_data, dd_dict = dd_labels))
#write out
if (isTRUE(write_dat)){
#data dictionary
dd_dict <- labelled::generate_dictionary(dd_data, details = TRUE)
dd_dict$label <- matrix(unlist(dd_labels, use.names = FALSE))
names(dd_dict)[1] <- 'column'
#interprets the value_labels as list so need to make everything a character
dd_dict_write <- sapply(dd_dict[c(1:3, 6:8, 12:13)], FUN = as.character)
if (isTRUE(writepath_arg)){
haven::write_sav(dd_data, path = paste0(write_path, 'delay_discounting.sav'))
write.csv(dd_dict_write, file = paste0(write_path, 'dict-delay_discounting.csv'), row.names = FALSE)
} else {
haven::write_sav(dd_data, path = 'delay_discounting.sav')
write.csv(dd_dict_write, file = 'dict_delay_discounting.csv', row.names = FALSE)
}
}
}
## 3g) Interoception ####
if (isFALSE(databases_arg) | 'intero' %in% databases){
#visit 3
v5_intero_data <- v5_data[['data']][c(1:2, 105:191)]
v5_intero_labels <- v5_data[['dict']][c(1:2, 105:191)]
names(v5_intero_data)[2] <- 'v5_date'
names(v5_intero_labels)[2] <- 'v5_date'
## merge databases - set all.x = FALSE so only get the participants with data at later visits/have not been screened out
intero_demov5_data <- merge(common_demo_data, v5_intero_data, by = 'id', all.x = FALSE, all.y = TRUE)
#get labels
intero_labels <- c(common_demo_labels, v5_intero_labels[2:length(v5_intero_labels)])
# ensure labels are up to date
intero_data = sjlabelled::set_label(intero_demov5_data, label = matrix(unlist(intero_labels, use.names = FALSE)))
# add to list
database_return <- c(database_return, list(intero_data = intero_data, intero_dict = intero_labels))
#write out
if (isTRUE(write_dat)){
#data dictionary
intero_dict <- labelled::generate_dictionary(intero_data, details = TRUE)
intero_dict$label <- matrix(unlist(intero_labels, use.names = FALSE))
names(intero_dict)[1] <- 'column'
#interprets the value_labels as list so need to make everything a character
intero_dict_write <- sapply(intero_dict[c(1:3, 6:8, 12:13)], FUN = as.character)
if (isTRUE(writepath_arg)){
haven::write_sav(intero_data, path = paste0(write_path, 'intero_data.sav'))
write.csv(intero_dict_write, file = paste0(write_path, 'dict-intero_data.csv'), row.names = FALSE)
} else {
haven::write_sav(intero_data, path = 'intero_data.sav')
write.csv(intero_dict_write, file = 'dict-intero_data.csv', row.names = FALSE)
}
}
}
## 3h) Notes ####
if (isFALSE(databases_arg) | 'notes' %in% databases){
#visit 1
v1_notes_data <- v1_data[['data']][c(1, 822:825)]
v1_notes_labels <- v1_data[['dict']][c(1, 822:825)]
#re-name variables with 'v1_' and add 'Visit 1 - ' to labels
for (v in 2:ncol(v1_notes_data)){
#names
var_name <- names(v1_notes_data)[v]
v1_name <- paste0('v1_', var_name)
names(v1_notes_data)[v] <- v1_name
#labels
v1_notes_labels[[var_name]] <- paste0('Visit 1 - ', v1_notes_labels[[var_name]])
}
names(v1_notes_labels) <- names(v1_notes_data)
#visit 2
v2_notes_data <- v2_data[['data']][c(1:2, 459:474)]
v2_notes_labels <- v2_data[['dict']][c(1:2, 459:474)]
names(v2_notes_data)[2] <- 'v2_date'
names(v2_notes_labels)[2] <- 'v2_date'
#re-name variables with 'v2_' and add 'Visit 2 - ' to labels
for (v in 3:ncol(v2_notes_data)){
#names
var_name <- names(v2_notes_data)[v]
v2_name <- paste0('v2_', var_name)
names(v2_notes_data)[v] <- v2_name
#labels
v2_notes_labels[[var_name]] <- paste0('Visit 2 - ', v2_notes_labels[[var_name]])
}
names(v2_notes_labels) <- names(v2_notes_data)
#visit 3
if (isTRUE(model_DD)){
v3_notes_data <- v3_data[['data']][c(1:2, 326:341)]
v3_notes_labels <- v3_data[['dict']][c(1:2, 326:341)]
} else {
v3_notes_data <- v3_data[['data']][c(1:2, 318:333)]
v3_notes_labels <- v3_data[['dict']][c(1:2, 318:333)]
}
names(v3_notes_data)[2] <- 'v3_date'
names(v3_notes_labels)[2] <- 'v3_date'
#re-name variables with 'v3_' and add 'Visit 3 - ' to labels
for (v in 3:ncol(v3_notes_data)){
#names
var_name <- names(v3_notes_data)[v]
v3_name <- paste0('v3_', var_name)
names(v3_notes_data)[v] <- v3_name
#labels
v3_notes_labels[[var_name]] <- paste0('Visit 3 - ', v3_notes_labels[[var_name]])
}
names(v3_notes_labels) <- names(v3_notes_data)
#visit 4
v4_notes_data <- v4_data[['data']][c(1:2, 271:287)]
v4_notes_labels <- v4_data[['dict']][c(1:2, 271:287)]
names(v4_notes_data)[2] <- 'v4_date'
names(v4_notes_labels)[2] <- 'v4_date'
#re-name variables with 'v4_' and add 'Visit 4 - ' to labels
for (v in 3:ncol(v4_notes_data)){
#names
var_name <- names(v4_notes_data)[v]
v4_name <- paste0('v4_', var_name)
names(v4_notes_data)[v] <- v4_name
#labels
v4_notes_labels[[var_name]] <- paste0('Visit 4 - ', v4_notes_labels[[var_name]])
}
names(v4_notes_labels) <- names(v4_notes_data)
#visit 5
v5_notes_data <- v5_data[['data']][c(1:2, 192:209)]
v5_notes_labels <- v5_data[['dict']][c(1:2, 192:209)]
names(v5_notes_data)[2] <- 'v5_date'
names(v5_notes_labels)[2] <- 'v5_date'
#re-name variables with 'v5_' and add 'Visit 5 - ' to labels
for (v in 3:ncol(v5_notes_data)){
#names
var_name <- names(v5_notes_data)[v]
v5_name <- paste0('v5_', var_name)
names(v5_notes_data)[v] <- v5_name
#labels
v5_notes_labels[[var_name]] <- paste0('Visit 5 - ', v5_notes_labels[[var_name]])
}
names(v5_notes_labels) <- names(v5_notes_data)
#visit 6
v6_notes_data <- v6_data[['data']][c(1:2, 337:360)]
v6_notes_labels <- v6_data[['dict']][c(1:2, 337:360)]
names(v6_notes_data)[2] <- 'v6_date'
names(v6_notes_labels)[2] <- 'v6_date'
#re-name variables with 'v6_' and add 'Visit 7 - ' to labels
for (v in 3:ncol(v6_notes_data)){
#names
var_name <- names(v6_notes_data)[v]
v6_name <- paste0('v6_', var_name)
names(v6_notes_data)[v] <- v6_name
#labels
v6_notes_labels[[var_name]] <- paste0('Visit 6 - ', v6_notes_labels[[var_name]])
}
#make names match
names(v6_notes_labels) <- names(v6_notes_data)
#visit 7
v7_notes_data <- v7_data[['data']][c(1:2, 999:1014)]
v7_notes_labels <- v7_data[['dict']][c(1:2, 999:1014)]
names(v7_notes_data)[2] <- 'v7_date'
names(v7_notes_labels)[2] <- 'v7_date'
#re-name variables with 'v7_' and add 'Visit 7 - ' to labels
for (v in 3:ncol(v7_notes_data)){
#names
var_name <- names(v7_notes_data)[v]
v7_name <- paste0('v7_', var_name)
names(v7_notes_data)[v] <- v7_name
#labels
v7_notes_labels[[var_name]] <- paste0('Visit 7 - ', v7_notes_labels[[var_name]])
}
#make names match
names(v7_notes_labels) <- names(v7_notes_data)
## merge databases from v1
notes_demov1_data <- merge(common_demo_data, v1_notes_data, by = 'id', all = TRUE)
## merge databases - set all.x = FALSE so only get the participants with data at later visits/have not been screened out
notes_demov1v2_data <- merge(notes_demov1_data, v2_notes_data, by = 'id', all = TRUE)
## other merges - set all = TRUE so get all participants in visits 2-7
notes_demov1v2v3_data <- merge(notes_demov1v2_data, v3_notes_data, by = 'id', all = TRUE)
notes_demov1v2v3v4_data <- merge(notes_demov1v2v3_data, v4_notes_data, by = 'id', all = TRUE)
notes_demov1v2v3v4v5_data <- merge(notes_demov1v2v3v4_data, v5_notes_data, by = 'id', all = TRUE)
notes_demov1v2v3v4v5v6_data <- merge(notes_demov1v2v3v4v5_data, v6_notes_data, by = 'id', all = TRUE)
notes_demov1v23v4v5v6v7_data <- merge(notes_demov1v2v3v4v5v6_data, v7_notes_data, by = 'id', all = TRUE)
#get labels
notes_labels <- c(common_demo_labels, v1_notes_labels[2:length(v1_notes_labels)], v2_notes_labels[2:length(v2_notes_labels)], v3_notes_labels[2:length(v3_notes_labels)], v4_notes_labels[2:length(v4_notes_labels)], v5_notes_labels[2:length(v5_notes_labels)], v6_notes_labels[2:length(v6_notes_labels)], v7_notes_labels[2:length(v7_notes_labels)])
# ensure labels are up to date
notes_data = sjlabelled::set_label(notes_demov1v23v4v5v6v7_data, label = matrix(unlist(notes_labels, use.names = FALSE)))
# add to list
database_return <- c(database_return, list(notes_data = notes_data, notes_dict = notes_labels))
#write out
if (isTRUE(write_dat)){
#data dictionary
notes_dict <- labelled::generate_dictionary(notes_data, details = TRUE)
notes_dict$label <- matrix(unlist(notes_labels, use.names = FALSE))
names(notes_dict)[1] <- 'column'
#interprets the value_labels as list so need to make everything a character
notes_dict_write <- sapply(notes_dict[c(1:3, 6:8, 12:13)], FUN = as.character)
if (isTRUE(writepath_arg)){
haven::write_sav(notes_data, path = paste0(write_path, 'visit_notes.sav'))
write.csv(notes_dict_write, file = paste0(write_path, 'dict-visit_notes.csv'), row.names = FALSE)
} else {
haven::write_sav(notes_data, path = 'visit_notes.sav')
write.csv(notes_dict_write, file = 'dict-visit_notes.csv', row.names = FALSE)
}
}
}
## 3i) PNA data ####
if (isFALSE(databases_arg) | 'pna' %in% databases){
#visit 1
v1_pna_data <- v1_data[['pna_data']]
v1_pna_labels <- v1_data[['pna_dict']]
#re-name variables with 'v1_' and add 'Visit 1 - ' to labels
for (v in 2:ncol(v1_pna_data)){
#names
var_name <- names(v1_pna_data)[v]
v1_name <- paste0('v1_', var_name)
names(v1_pna_data)[v] <- v1_name
#labels
v1_pna_labels[[var_name]] <- paste0('Visit 1 - ', v1_pna_labels[[var_name]])
}
names(v1_pna_labels) <- names(v1_pna_data)
#visit 2
v2_pna_data <- v2_data[['pna_data']]
v2_pna_labels <- v2_data[['pna_dict']]
#re-name variables with 'v2_' and add 'Visit 2 - ' to labels
for (v in 2:ncol(v2_pna_data)){
#names
var_name <- names(v2_pna_data)[v]
v2_name <- paste0('v2_', var_name)
names(v2_pna_data)[v] <- v2_name
#labels
v2_pna_labels[[var_name]] <- paste0('Visit 2 - ', v2_pna_labels[[var_name]])
}
names(v2_pna_labels) <- names(v2_pna_data)
#visit 3
v3_pna_data <- v3_data[['pna_data']]
v3_pna_labels <- v3_data[['pna_dict']]
#re-name variables with 'v3_' and add 'Visit 3 - ' to labels
for (v in 2:ncol(v3_pna_data)){
#names
var_name <- names(v3_pna_data)[v]
v3_name <- paste0('v3_', var_name)
names(v3_pna_data)[v] <- v3_name
#labels
v3_pna_labels[[var_name]] <- paste0('Visit 3 - ', v3_pna_labels[[var_name]])
}
names(v3_pna_labels) <- names(v3_pna_data)
#visit 4
v4_pna_data <- v4_data[['pna_data']]
v4_pna_labels <- v4_data[['pna_dict']]
#re-name variables with 'v4_' and add 'Visit 4 - ' to labels
for (v in 2:ncol(v4_pna_data)){
#names
var_name <- names(v4_pna_data)[v]
v4_name <- paste0('v4_', var_name)
names(v4_pna_data)[v] <- v4_name
#labels
v4_pna_labels[[var_name]] <- paste0('Visit 4 - ', v4_pna_labels[[var_name]])
}
names(v4_pna_labels) <- names(v4_pna_data)
#visit 5
v5_pna_data <- v5_data[['pna_data']]
v5_pna_labels <- v5_data[['pna_dict']]
#re-name variables with 'v5_' and add 'Visit 5 - ' to labels
for (v in 2:ncol(v5_pna_data)){
#names
var_name <- names(v5_pna_data)[v]
v5_name <- paste0('v5_', var_name)
names(v5_pna_data)[v] <- v5_name
#labels
v5_pna_labels[[var_name]] <- paste0('Visit 5 - ', v5_pna_labels[[var_name]])
}
names(v5_pna_labels) <- names(v5_pna_data)
#visit 7
v7_pna_data <- v7_data[['pna_data']]
v7_pna_labels <- v7_data[['pna_dict']]
#re-name variables with 'v7_' and add 'Visit 7 - ' to labels
for (v in 2:ncol(v7_pna_data)){
#names
var_name <- names(v7_pna_data)[v]
v7_name <- paste0('v7_', var_name)
names(v7_pna_data)[v] <- v7_name
#labels
v7_pna_labels[[var_name]] <- paste0('Visit 7 - ', v7_pna_labels[[var_name]])
}
#make names match
names(v7_pna_labels) <- names(v7_pna_data)
## merge databases - set all = TRUE so get all participants in visits 1-7
pna_v1v2_data <- merge(v1_pna_data, v2_pna_data, by = 'id', all = TRUE)
pna_v1v2v3_data <- merge(pna_v1v2_data, v3_pna_data, by = 'id', all = TRUE)
pna_v1v2v3v4_data <- merge(pna_v1v2v3_data, v4_pna_data, by = 'id', all = TRUE)
pna_v1v2v3v4v5_data <- merge(pna_v1v2v3v4_data, v5_pna_data, by = 'id', all = TRUE)
pna_v1v2v3v4v5v7_data <- merge(pna_v1v2v3v4v5_data, v7_pna_data, by = 'id', all = TRUE)
#get labels
pna_labels <- c(v1_pna_labels, v2_pna_labels[2:length(v2_pna_labels)], v3_pna_labels[2:length(v3_pna_labels)], v4_pna_labels[2:length(v4_pna_labels)], v5_pna_labels[2:length(v5_pna_labels)], v7_pna_labels[2:length(v7_pna_labels)])
# ensure labels are up to date
pna_data = sjlabelled::set_label(pna_v1v2v3v4v5v7_data, label = matrix(unlist(pna_labels, use.names = FALSE)))
# add to list
database_return <- c(database_return, list(pna_data = pna_data, pna_dict = pna_labels))
#write out
if (isTRUE(write_dat)){
#data dictionary
pna_dict <- labelled::generate_dictionary(pna_data, details = TRUE)
pna_dict$label <- matrix(unlist(pna_labels, use.names = FALSE))
names(pna_dict)[1] <- 'column'
#interprets the value_labels as list so need to make everything a character
pna_dict_write <- sapply(pna_dict[c(1:3, 6:8, 12:13)], FUN = as.character)
if (isTRUE(writepath_arg)){
haven::write_sav(pna_data, path = paste0(write_path, 'pna_notes.sav'))
write.csv(pna_dict_write, file = paste0(write_path, 'dict-pna_notes.csv'), row.names = FALSE)
} else {
haven::write_sav(pna_data, path = 'pna_notes.sav')
write.csv(pna_dict_write, file = 'dict-pna_notes.csv', row.names = FALSE)
}
}
}
## 3i) Microstructure data ####
if (isFALSE(databases_arg) | 'micro' %in% databases){
micro_data[['beh_wide']][['data']][['id']] <- as.numeric(micro_data[['beh_wide']][['data']][['id']])
micro_data[['event']][['data']][['id']] <- as.numeric(micro_data[['event']][['data']][['id']])
## merge databases - set all = TRUE so get all participants in visits 1-7
beh_wide_demo_data <- merge(common_demo_data, micro_data[['beh_wide']][['data']], by = 'id', all = TRUE)
event_demo_data <- merge(common_demo_data, micro_data[['event']][['data']], by = 'id', all = TRUE)
#get labels
beh_wide_labels <- c(common_demo_labels, micro_data[['beh_wide']][['dict']][2:length(micro_data[['beh_wide']][['dict']])])
event_labels <- c(common_demo_labels, micro_data[['event']][['dict']][2:length(micro_data[['event']][['dict']])])
# ensure labels are up to date
beh_wide_data = sjlabelled::set_label(beh_wide_demo_data, label = matrix(unlist(beh_wide_labels, use.names = FALSE)))
event_data = sjlabelled::set_label(event_demo_data, label = matrix(unlist(event_labels, use.names = FALSE)))
# add to list
database_return <- c(database_return, list(micro_wide = beh_wide_data, micro_wide_dict = beh_wide_labels, micro_event = event_data, micro_event_dict = event_labels))
#write out
if (isTRUE(write_dat)){
#data dictionary
micro_wide_dict <- labelled::generate_dictionary(beh_wide_data, details = TRUE)
micro_wide_dict$label <- matrix(unlist(beh_wide_labels, use.names = FALSE))
names(micro_wide_dict)[1] <- 'column'
micro_event_dict <- labelled::generate_dictionary(event_data, details = TRUE)
micro_event_dict$label <- matrix(unlist(event_labels, use.names = FALSE))
names(micro_event_dict)[1] <- 'column'
#interprets the value_labels as list so need to make everything a character
micro_wide_dict_write <- sapply(micro_wide_dict[c(1:3, 6:8, 12:13)], FUN = as.character)
micro_event_dict_write <- sapply(micro_event_dict[c(1:3, 6:8, 12:13)], FUN = as.character)
if (isTRUE(writepath_arg)){
haven::write_sav(beh_wide_data, path = paste0(write_path, 'micro_beh_summary.sav'))
write.csv(micro_wide_dict_write, file = paste0(write_path, 'dict-micro_beh_summary.csv'), row.names = FALSE)
haven::write_sav(event_data, path = paste0(write_path, 'micro_events.sav'))
write.csv(micro_event_dict_write, file = paste0(write_path, 'dict-micro_events.csv'), row.names = FALSE)
} else {
haven::write_sav(beh_wide_data, 'micro_beh_summary.sav')
write.csv(micro_wide_dict, 'dict-micro_beh_summary.csv', row.names = FALSE)
haven::write_sav(event_data, path = 'micro_events.sav')
write.csv(micro_event_dict, file = 'dict-micro_events.csv', row.names = FALSE)
}
}
}
#### 10. Return List #####
if(isTRUE(return_data)){
return(database_return)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.