#' Create clean, long dataset from data received from Roqua
#'
#' @param data A dataframe in pre-specified format (265 variables)
#' @return A clean dataframe that can be used for visualization
#' @import dplyr
#' @importFrom lubridate as_datetime
#' @export
prepare_data <- function(data) {
# If dataset does not have right number of variables, then stop
if( !is.data.frame(data) ) {
return("Data is not a dataset")
}
# If dataset does not have right number of variables, then stop
if(length(names(data)) != 265 ) {
return("Dataset does not have right number of variables (265)")
}
should_include <- c(
"id", "respondent_id", "csp_dna_non_response", "csp_dna_open_from",
"csp_dna_fase", "csp_dna_1", "csp_dna_2", "csp_dna_3", "csp_dna_4",
"csp_dna_5", "csp_dna_6", "csp_dna_7", "csp_dna_8", "csp_dna_9",
"csp_dna_10", "csp_dna_11", "csp_dna_12", "csp_dna_13", "csp_dna_14",
"csp_dna_15", "csp_dna_16", "csp_dna_17", "csp_dna_18", "csp_dna_19",
"csp_dna_20", "csp_dna_21", "csp_dna_22", "csp_dna_23", "csp_dna_24",
"csp_dna_25", "csp_dna_26", "csp_dna_27", "csp_dna_28", "csp_dna_29",
"csp_dna_30", "csp_dna_31", "csp_dna_32", "csp_dna_33", "csp_dna_34",
"csp_dna_35", "csp_dna_36", "csp_dna_37", "csp_dna_38", "csp_dna_39",
"csp_dna_40", "csp_dna_41", "csp_dna_42", "csp_dna_43", "csp_dna_44",
"csp_dna_45", "csp_dna_46", "csp_dna_47", "csp_dna_48", "csp_dna_49",
"csp_dna_50", "csp_dna_51", "csp_dna_52", "csp_dna_53", "csp_dna_54",
"csp_dna_58", "csp_dna_59", "csp_dna_60", "csp_dna_61", "csp_dna_62",
"csp_dna_63", "csp_dna_64", "csp_dna_65", "csp_dna_66", "csp_dna_67",
"csp_dna_68", "csp_dna_69", "csp_dna_70", "csp_dna_71", "csp_dna_72",
"csp_dna_73", "csp_dna_74", "csp_dna_75", "csp_dna_76", "csp_dna_77",
"csp_dna_78", "csp_dna_58a_a1", "csp_dna_58a_a2", "csp_dna_58a_a3",
"csp_dna_58a_a4", "csp_dna_58a_a5", "csp_dna_58a_a6", "csp_dna_59a_a1",
"csp_dna_59a_a2", "csp_dna_59a_a3", "csp_dna_59a_a4", "csp_dna_59a_a5",
"csp_dna_59a_a6", "csp_dna_60a_a1", "csp_dna_60a_a2", "csp_dna_60a_a3",
"csp_dna_60a_a4", "csp_dna_60a_a5", "csp_dna_60a_a6", "csp_dna_61a_a1",
"csp_dna_61a_a2", "csp_dna_61a_a3", "csp_dna_61a_a4", "csp_dna_61a_a5",
"csp_dna_61a_a6", "csp_dna_62a_a1", "csp_dna_62a_a2", "csp_dna_62a_a3",
"csp_dna_62a_a4", "csp_dna_62a_a5", "csp_dna_62a_a6", "csp_dna_63a_a1",
"csp_dna_63a_a2", "csp_dna_63a_a3", "csp_dna_63a_a4", "csp_dna_63a_a5",
"csp_dna_63a_a6", "csp_dna_63a_a7", "csp_dna_63a_a8", "csp_dna_63a_a9",
"csp_dna_63a_a10", "csp_dna_63a_a11", "csp_dna_63a_a12", "csp_dna_63a_a13",
"csp_dna_63a_a14", "csp_dna_64a_a1", "csp_dna_64a_a2", "csp_dna_64a_a3",
"csp_dna_64a_a4", "csp_dna_64a_a5", "csp_dna_64a_a6", "csp_dna_64a_a7",
"csp_dna_64a_a8", "csp_dna_64a_a9", "csp_dna_64a_a10", "csp_dna_64a_a11",
"csp_dna_64a_a12", "csp_dna_64a_a13", "csp_dna_64a_a14", "csp_dna_65a_a1",
"csp_dna_65a_a2", "csp_dna_65a_a3", "csp_dna_65a_a4", "csp_dna_65a_a5",
"csp_dna_65a_a6", "csp_dna_65a_a7", "csp_dna_65a_a8", "csp_dna_65a_a9",
"csp_dna_65a_a10", "csp_dna_65a_a11", "csp_dna_65a_a12", "csp_dna_65a_a13",
"csp_dna_65a_a14", "csp_dna_66a_a1", "csp_dna_66a_a2", "csp_dna_66a_a3",
"csp_dna_66a_a4", "csp_dna_67a_a1", "csp_dna_67a_a2", "csp_dna_67a_a3",
"csp_dna_67a_a4", "csp_dna_67a_a5", "csp_dna_67a_a6", "csp_dna_67a_a7",
"csp_dna_67a_a8", "csp_dna_67a_a9", "csp_dna_67a_a10", "csp_dna_67a_a11",
"csp_dna_67a_a12", "csp_dna_67a_a13", "csp_dna_67a_a14", "csp_dna_68a_a1",
"csp_dna_68a_a2", "csp_dna_68a_a3", "csp_dna_68a_a4", "csp_dna_68a_a5",
"csp_dna_68a_a6", "csp_dna_68a_a7", "csp_dna_68a_a8", "csp_dna_68a_a9",
"csp_dna_68a_a10", "csp_dna_68a_a11", "csp_dna_68a_a12", "csp_dna_68a_a13",
"csp_dna_68a_a14", "csp_dna_69a_a1", "csp_dna_69a_a2", "csp_dna_69a_a3",
"csp_dna_69a_a4", "csp_dna_69a_a5", "csp_dna_69a_a6", "csp_dna_69a_a7",
"csp_dna_69a_a8", "csp_dna_69a_a9", "csp_dna_69a_a10", "csp_dna_69a_a11",
"csp_dna_69a_a12", "csp_dna_69a_a13", "csp_dna_69a_a14", "csp_dna_70a_a1",
"csp_dna_70a_a2", "csp_dna_70a_a3", "csp_dna_70a_a4", "csp_dna_70a_a5",
"csp_dna_70a_a6", "csp_dna_70a_a7", "csp_dna_70a_a8", "csp_dna_70a_a9",
"csp_dna_70a_a10", "csp_dna_70a_a11", "csp_dna_70a_a12", "csp_dna_71a_a1",
"csp_dna_71a_a2", "csp_dna_71a_a3", "csp_dna_71a_a4", "csp_dna_71a_a5",
"csp_dna_71a_a6", "csp_dna_72a_a1", "csp_dna_72a_a2", "csp_dna_72a_a3",
"csp_dna_72a_a4", "csp_dna_72a_a5", "csp_dna_72a_a6", "csp_dna_72a_a7",
"csp_dna_72a_a8", "csp_dna_73a_a1", "csp_dna_73a_a2", "csp_dna_73a_a3",
"csp_dna_73a_a4", "csp_dna_73a_a5", "csp_dna_73a_a6", "csp_dna_73a_a7",
"csp_dna_73a_a8", "csp_dna_74a_a1", "csp_dna_74a_a2", "csp_dna_74a_a3",
"csp_dna_74a_a4", "csp_dna_75a_a1", "csp_dna_75a_a2", "csp_dna_75a_a3",
"csp_dna_75a_a4", "csp_dna_76a_a1", "csp_dna_76a_a2", "csp_dna_76a_a3",
"csp_dna_76a_a4", "csp_dna_76a_a5", "csp_dna_76a_a6", "csp_dna_76a_a7",
"csp_dna_76a_a8", "csp_dna_76a_a9", "csp_dna_76a_a10", "csp_dna_76a_a11",
"csp_dna_76a_a12", "csp_dna_76a_a13", "csp_dna_76a_a14", "csp_dna_76a_a15",
"csp_dna_76a_a16", "csp_dna_55a", "csp_dna_55_a0", "csp_dna_56a",
"csp_dna_57a", "csp_dna_77a", "csp_dna_78a", "csp_dna_79", "csp_dna_79a",
"csp_dna_79b")
# If dataset does not have all pre-specified variables then stop
if(!all(names(data) %in% should_include)) {
return("Dataset does not have prespecified variable list")
}
# If dataset has many completely missing questionnaires (>90%), then stop
if( sum(is.na(data$csp_dna_non_response)) / nrow(data) < 0.10 ) {
return("Too many non-responses")
}
# If dataset has fewer than 20 rows, then stop
if( nrow(data) < 20 ) {
return("Dataset has fewer than 20 rows")
}
# Vector with variable names of variables in circle-figure
bedroefd = c("csp_dna_1", "csp_dna_2", "csp_dna_3", "csp_dna_4", "csp_dna_5",
"csp_dna_6", "csp_dna_7", "csp_dna_8")
blij = c("csp_dna_9", "csp_dna_10", "csp_dna_11", "csp_dna_12", "csp_dna_13")
boos = c("csp_dna_14", "csp_dna_15", "csp_dna_16", "csp_dna_17")
bang = c("csp_dna_18", "csp_dna_19", "csp_dna_20", "csp_dna_21",
"csp_dna_22", "csp_dna_23", "csp_dna_24")
energie = c("csp_dna_25", "csp_dna_26", "csp_dna_27")
spanning = c("csp_dna_28", "csp_dna_29", "csp_dna_30", "csp_dna_31",
"csp_dna_32", "csp_dna_33", "csp_dna_34", "csp_dna_35")
ondernemen = c("csp_dna_36", "csp_dna_37")
contact_beh = c("csp_dna_38", "csp_dna_39", "csp_dna_40")
zelfbeeld = c("csp_dna_41", "csp_dna_42", "csp_dna_43", "csp_dna_44",
"csp_dna_45", "csp_dna_46", "csp_dna_47", "csp_dna_48")
erv_cont = c("csp_dna_49", "csp_dna_50", "csp_dna_51", "csp_dna_52",
"csp_dna_53", "csp_dna_54")
terugtrekken = c("csp_dna_58", "csp_dna_59", "csp_dna_60")
zorg_zelf = c("csp_dna_61", "csp_dna_62")
destructief = c("csp_dna_63", "csp_dna_64", "csp_dna_65")
suicidaliteit = c("csp_dna_66")
activiteiten = c("csp_dna_67", "csp_dna_68", "csp_dna_69")
onrustig = c("csp_dna_70")
bijzondere_erv = c("csp_dna_71")
verplichtingen = c("csp_dna_72", "csp_dna_73")
negatief_contact = c("csp_dna_74", "csp_dna_75")
lichamelijke_kla = c("csp_dna_76")
plezierig = c("csp_dna_77")
onplezierig = c("csp_dna_78")
# Create network variables with new names
# Can have multiple mutally exclusive possibilities so collapse into one
# e.g., "blij" has five ways of measuring, but only 1 is used per client
data <- data %>%
mutate(
# combine all vectors of variable names that need to become numeric
across(all_of(c(bedroefd, blij, boos, bang, energie, spanning, ondernemen,
contact_beh, zelfbeeld, erv_cont, terugtrekken,
zorg_zelf, destructief, suicidaliteit, activiteiten,
onrustig, bijzondere_erv, verplichtingen, negatief_contact,
lichamelijke_kla, plezierig, onplezierig)), as.numeric)
) %>%
mutate(
Bedroefd = rowMeans(select(., all_of(bedroefd)), na.rm = TRUE),
Blij = rowMeans(select(., all_of(blij)), na.rm = TRUE),
Boos = rowMeans(select(., all_of(boos)), na.rm = TRUE),
Bang = rowMeans(select(., all_of(bang)), na.rm = TRUE),
Energie = rowMeans(select(., all_of(energie)), na.rm = TRUE),
Spanning = rowMeans(select(., all_of(spanning)), na.rm = TRUE),
Ondernemen = rowMeans(select(., all_of(ondernemen)), na.rm = TRUE),
Contact_behoefte = rowMeans(select(., all_of(contact_beh)), na.rm = TRUE),
Zelfbeeld = rowMeans(select(., all_of(zelfbeeld)), na.rm = TRUE),
Ervaren_controle = rowMeans(select(., all_of(erv_cont)), na.rm = TRUE),
Terugtrekken = rowMeans(select(., all_of(terugtrekken)), na.rm = TRUE),
Zorg_zelf = rowMeans(select(., all_of(zorg_zelf)), na.rm = TRUE),
Destructief = rowMeans(select(., all_of(destructief)), na.rm = TRUE),
Suicidaliteit = rowMeans(select(., all_of(suicidaliteit)), na.rm = TRUE),
Activiteiten = rowMeans(select(., all_of(activiteiten)), na.rm = TRUE),
Onrustig = rowMeans(select(., all_of(onrustig)), na.rm = TRUE),
Bijzondere_ervaringen = rowMeans(select(., all_of(bijzondere_erv)), na.rm = TRUE),
Verplichtingen = rowMeans(select(., all_of(verplichtingen)), na.rm = TRUE),
Negatief_contact = rowMeans(select(., all_of(negatief_contact)), na.rm = TRUE),
Lichamelijke_klachten = rowMeans(select(., all_of(lichamelijke_kla)), na.rm = TRUE),
Plezierig = rowMeans(select(., all_of(plezierig)), na.rm = TRUE),
Onplezierig = rowMeans(select(., all_of(onplezierig)), na.rm = TRUE)
)
# If dataset has many missings on particular items (>50%), then stop
if( (sum(is.nan(data$Bedroefd)) / nrow(data)) > 0.50 ) {
return("Too many non-responses on questions on bedroefd")
}
# New names of all variables in circle
circle_vars_nms <- c("Bedroefd", "Blij", "Boos" , "Bang", "Energie",
"Spanning", "Ondernemen", "Contact_behoefte",
"Zelfbeeld", "Ervaren_controle", "Terugtrekken",
"Zorg_zelf", "Destructief", "Suicidaliteit",
"Activiteiten", "Onrustig", "Bijzondere_ervaringen",
"Verplichtingen", "Negatief_contact",
"Lichamelijke_klachten", "Plezierig", "Onplezierig")
# Long code to determine which variables were used and should be included in graphics
# Names of variables Terugtrekken, "...a_na" not included
tt <- c("csp_dna_58","csp_dna_59","csp_dna_60")
# Name of variable that is not empty
tt_sel <- tt[colSums(!is.na(data[, tt])) > 0]
# Names of variables for grid
tt_vars <- paste0(tt_sel, paste0("a_a", 1:6))
# Names of variables Zelfzorg, "...a_na" not included
zz <- c("csp_dna_61","csp_dna_62")
# Name of variable that is not empty
zz_sel <- zz[colSums(!is.na(data[, zz])) > 0]
# Names of variables for grid
zz_vars <- paste0(zz_sel, paste0("a_a", 1:6))
# Names of variables Destructief, "...a_a14a" & "...a_na" not included
de <- c("csp_dna_63","csp_dna_64","csp_dna_65")
# Name of variable that is not empty
de_sel <- de[colSums(!is.na(data[, de])) > 0]
# Names of variables for grid
de_vars <- paste0(de_sel, paste0("a_a", 1:14))
# Names of variables Suicidaliteit, "...a_na" not included
su_vars <- c("csp_dna_66a_a1", "csp_dna_66a_a2", "csp_dna_66a_a3", "csp_dna_66a_a4")
# Names of variables Activiteiten, "...a_a14a" & "...a_na" not included
ac <- c("csp_dna_67","csp_dna_68","csp_dna_69")
# Name of variable that is not empty
ac_sel <- ac[colSums(!is.na(data[, ac])) > 0]
# Names of variables for grid
ac_vars <- paste0(ac_sel, paste0("a_a", 1:14))
# Names of variables Onrustig, "...a_a13", "...a_a13a" & "...a_na" not included
on_vars <- c("csp_dna_70a_a1", "csp_dna_70a_a2", "csp_dna_70a_a3",
"csp_dna_70a_a4", "csp_dna_70a_a5", "csp_dna_70a_a6",
"csp_dna_70a_a7", "csp_dna_70a_a8", "csp_dna_70a_a9",
"csp_dna_70a_a10", "csp_dna_70a_a11", "csp_dna_70a_a12")
# Names of variables Bijzondere / Psychiatrische ervaringen,
# "...a_na" not included
be_vars <- c("csp_dna_71a_a1", "csp_dna_71a_a2", "csp_dna_71a_a3",
"csp_dna_71a_a4", "csp_dna_71a_a5", "csp_dna_71a_a6")
# Names of variables Verplichtingen, "...a_na" not included
ve <- c("csp_dna_72","csp_dna_73")
# Name of variable that is not empty
ve_sel <- ve[colSums(!is.na(data[, ve])) > 0]
# Names of variables for grid
ve_vars <- paste0(ve_sel, paste0("a_a", 1:8))
# Names of variables Negatief contact, "...a_na" not included
nc <- c("csp_dna_74","csp_dna_75")
# Name of variable that is not empty
nc_sel <- nc[colSums(!is.na(data[, nc])) > 0]
# Names of variables for grid
nc_vars <- paste0(nc_sel, paste0("a_a", 1:4))
# Names of variables Lichamelijke klachten, "...a_na" not included
lc_vars <- c("csp_dna_76a_a1", "csp_dna_76a_a2", "csp_dna_76a_a3",
"csp_dna_76a_a4", "csp_dna_76a_a5", "csp_dna_76a_a6",
"csp_dna_76a_a7", "csp_dna_76a_a8", "csp_dna_76a_a9",
"csp_dna_76a_a10", "csp_dna_76a_a11", "csp_dna_76a_a12",
"csp_dna_76a_a13", "csp_dna_76a_a14", "csp_dna_76a_a15",
"csp_dna_76a_a16")
# lichamelijke_kla = c("csp_dna_76")
# plezierig = c("csp_dna_77")
# onplezierig = c("csp_dna_78")
# Names of all variables in grid
grid_nms <- c(tt_vars, # names of 6 variables on terugtrekken
zz_vars, # names of 6 variables on zelfzorg
de_vars, # names of 14 variables on Destructief
su_vars, # names of 4 variables on Suicidaliteit
ac_vars, # names of 14 variables on Activiteiten
on_vars, # names of 12 variables on Onrustig / Neurotisch
be_vars, # names of 6 variables on Bijzondere ervaringen
ve_vars, # names of 8 variables on Verplichtingen
nc_vars, # names of 4 variables on Negatief contact
lc_vars # names of 16 variables on Negatief contact
)
# New names of all variables in grid
grid_nms_mw <- c(
# New names for terugtrekken
"Whatsapp", "Bellen", "Deurbel", "Smsen", "Afgezegd", "Werk_school_sport",
# New names for zelfzorg
"Douchen", "Dag_nacht_ritme", "Eten", "Bewogen", "Recept_Medicatie",
"Medicatie_ingenomen",
# New names for destructief
"Snijden", "Bonken_hoofd", "Krabben",
"Krassen", "Slaan_vuist", "Anderen_schade", "Spullen_kapot",
"Uitrekken_haren", "Alcohol_drugs", "Uitgeven_geld", "Gokken",
"Seksueel_risicovol", "Eetbui", "Destructie_Anders",
# New names for suicidaliteit
"Passieve_gedachtes", "Actieve_gedachtes",
"Afscheidsbrief", "Concreet_plan",
# New names for activiteiten
"TV", "Muziek", "Yoga", "Wandelen", "Creatief", "Lezen", "Sporten",
"Huishouden", "Spelletje", "Slapen", "Contact_zoeken", "Afgesproken",
"Buiten", "Anders",
# New names for onrustig
"Nagelbijten", "Ijsberen", "Mouw_trekken", "Schoonmaken", "Roken",
"Praten_anders", "Uitpraten_niet", "Piekeren", "Dwanghandelingen",
"Contact_veel", "Alleen_niet", "Beslissingen_zelf",
# New names for Bijzondere ervaringen
"Stemmen", "Schimmen", "Dissociaties", "Achterdochtig",
"Opdracht_krijgen", "Herbeleving",
# New names for Verplichtingen
"Werken", "Opleiding", "Kind_zorg", "Wassen", "Koken",
"Schoonmaken_verplichting", "Boodschappen", "Afspraken_nakomen",
# New names for negatief ontact
"Telefoon_onaardig", "Face_to_face_onaardig",
"Whatsapp_onaardig", "Social_media_onaardig",
# New names for lichamelijke klachten
"Ademhalingsproblemen", "Pijn_borst", "Hartkloppingen", "Misselijkheid",
"Ontlasting", "Buikpijn", "Hoofdpijn", "Duizeligheid", "Vermoeidheid",
"Rugpijn", "Spierpijn", "Tintelingen", "Zweten", "Veel_slapen",
"Weinig_slapen", "Nachtmerries")
# Rename network variables into more sensible names
data <- data %>%
# Create date variables
mutate(Datum = as_datetime(csp_dna_open_from)) %>%
# Rename grid variables and make them numeric
rename_at(vars(all_of(grid_nms)), ~ grid_nms_mw) %>%
mutate_at(vars(all_of(grid_nms_mw)), as.numeric) %>%
# Select only relevant variables
select(all_of(c("Datum", "csp_dna_non_response",
circle_vars_nms, grid_nms_mw,
"csp_dna_55a", "csp_dna_55_a0", "csp_dna_56a",
"csp_dna_57a", "csp_dna_77a", "csp_dna_78a",
# Added sleep variables
"csp_dna_79", "csp_dna_79a", "csp_dna_79b",
"csp_dna_fase")))
# Create interval variables
data$dayno <- time_count(data[["Datum"]], unit = "day")
data$pertwee <- (data[["dayno"]] ) %/% 2
return(data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.