# library(tidyr)
# library(dplyr)
# library(lubridate)
#' Process data before visualisation.
#'
#' @param data A dataframe.
#' @param var_date A string with the name of the date-variable.
#' @param format_date A string with the format of the date (e.g., "ymd", "ymd_HM").
#' @param vars_meas A vector of variable names that need to be visualised.
#' @param vars_groups A vector of specific variable names that are in vars_meas.
#' @param vars_event A vector of names of variables that describe events.
#' @param vars_descr A vector of names of variables that describe events.
#' @param interval XXXXXXXX Can be week or day
#' @param ID A list that must contain the elements ID_var and ID; ID_var must be a string with a variable name, and ID must be a string of the unique identifier.
#' @param type_vis The type of visualisation required; the options are "timeseries" (default), "zoom", "barchart", and "network".
#' @param time_frame A vector with the first and last measurement.
#' @param sel_period A vector with the first and last measurement for selecting a specific period.
#' @param sel_period_zoom A vector with the first and last measurement for selecting a specific period in the "zoom" graph.
#' @return A list with one or two dataframes for visualisation.
#' @importFrom tidyr gather
#' @importFrom dplyr filter mutate group_by ungroup dense_rank
#' @importFrom lubridate parse_date_time wday as_date
#' @importFrom magrittr %>%
#' @export
data_processing <- function(data = NULL,
var_date = NULL,
format_date = NULL,
vars_meas = NULL,
vars_groups = NULL,
vars_event = NULL,
vars_descr = NULL,
interval = "week",
ID = NULL,
type_vis = "timeseries",
time_frame = "all",
sel_period = NULL,
sel_period_zoom = NULL)
{
# Checking all arguments ---------------------------
if (is.null(data) || !is.data.frame(data)) {
stop("Please provide a dataframe with the argument 'data = ...'.
Perhaps use data.frame(...)")
}
# COULD ALSO ADD WEEK HERE
if ( is.null(var_date) ) {
warning("No date variable specified. Using rownumber as date.
Use argument 'var_date = ...' to specify")
data$date_esmvis <- 1:nrow(data)
} else if ( inherits(data[[var_date]],
c("Date", "POSIXlt", "POSIXct", "is.POSIXt")) ) {
data$date_esmvis <- data[[var_date]]
} else {
if ( is.null(format_date) ) {
warning("No dateformat specified. Using rownumber as date.
Use argument 'format_date = ...' to specify")
data$date_esmvis <- 1:nrow(data)
} else {
data$date_esmvis <- lubridate::parse_date_time(data[[var_date]],
format_date)
}
}
if ( is.null(vars_meas) ) {
stop("No variables specified for visualisation.
Use argument 'vars_meas = ...' to specify.")
} else if ( !all(vars_meas %in% colnames(data)) ) {
stop("Not all variables specified in 'vars_meas = ...' exist!")
} else if ( !any(apply(data[vars_meas], 2, is.numeric)) ) {
stop("Not all variables specified in 'vars_meas = ...' are numeric!")
}
### CHECKING INTERVAL MAKE FUNCTIONS
# PAS DEZE AAN
# if ( !is.null(vars_groups) ) {
# if ( !all(vars_groups %in% colnames(data)) ) {
# stop("Not all variables specified in 'vars_groups = ...' exist!")
# } else if ( !all(vars_groups %in% vars_meas) ) {
# stop("Not all grouping variables specified in 'vars_groups = ...'
# are specified in 'vars_meas = ...'")
# }
# }
# check_events(vars_events)
# check_descr(vars_descr)
# if ( length(vars_event) != 0 ) {
# if (length(vars_event) == 2) {
# if(is.null(vars_event[["score_event"]]) ||
# is.null(vars_event[["text_event"]]) ) {
# stop("One of the variables in 'vars_event = ...' is NULL")
# } else if ( !all(c(vars_event[["score_event"]], vars_event[["text_event"]]
# %in% colnames(data)) ) ) {
# stop("Not all variables in 'vars_event = ...' exist!")
# } else if ( !is.numeric(data[[vars_event[["score_event"]]]]) ) {
# stop("Variable in 'score_event = ...' not numerical!")
# }
# } else {
# if( !is.null(vars_event[["score_event"]]) ) {
# if ( !(vars_event[["score_event"]] %in% colnames(data)) ) {
# stop("Variable in 'score_event = ...' does not exist!")
# } else if ( !is.numeric(data[[vars_event[["score_event"]]]]) ) {
# stop("Variable in 'score_event = ...' not numerical!")
# }
# } else if( !is.null(vars_event[["text_event"]]) ) {
# if ( !(vars_event[["text_event"]] %in% colnames(data)) ) {
# stop("Variable in 'text_event = ...' does not exist!")
# }
# } else {
# stop("Variable in 'vars_event = ...' is NULL")
# }
# }
# }
if ( length(ID) != 0 ) {
if ( length(ID) == 2 ) {
if ( !is.null(ID[["var_ID"]]) && !is.null(ID[["ID"]]) ) {
if ( !all(ID[["var_ID"]] %in% colnames(data)) ) {
stop("Not all variables in 'ID = ...' exist!")
} else if ( !all(ID[["ID"]] %in% data[[ID[["var_ID"]]]]) ) {
stop("ID specified not found in data.")
}
} else if ( !is.null(ID[["var_ID"]]) && is.null(ID[["ID"]]) ) {
stop("No ID-number/person specified.
Use 'list(var_ID = ..., ID = ...)' for specification.")
} else if ( is.null(ID[["var_ID"]]) && !is.null(ID[["ID"]]) ) {
stop("ID/person specified, but not the name of the ID-variable.
Use 'list(var_ID = ..., ID = ...)' for specification.")
} else if ( is.null(ID[["var_ID"]]) && is.null(ID[["ID"]]) ) {
stop("Both ID and var_ID are NULL.
Use 'list(var_ID = ..., ID = ...)' for specification.")
}
} else {
stop("Please specify both ID-variable and ID.
Use 'list(var_ID = ..., ID = ...)' for specification.")
}
}
if ( length(ID) == 2 ) {
data <- dplyr::filter(data,
data[ID[["var_ID"]]] == ID[["ID"]])
}
# ADD WARNINGS
# SOMETHING GOES WRONG HERE
library(tidyverse)
if ( !is.null(interval) ) {
if (inherits(data[["date_esmvis"]],
c("Date", "POSIXlt", "POSIXct", "is.POSIXt"))) {
data$weekno_esmvis <- time_count(data[["date_esmvis"]], unit = "week")
data$dayno_esmvis <- time_count(data[["date_esmvis"]], unit = "day")
data$wday_esmvis <- lubridate::wday(data[["date_esmvis"]],
label = TRUE, abbr = TRUE,
week_start = 1)
} else if( is.numeric(data[["date_esmvis"]]) ) {
data$weekno_esmvis <- (( data[["date_esmvis"]] - 1 ) %/% 7) + 1
data$dayno_esmvis <- data[["date_esmvis"]]
}
data <- data %>%
dplyr::group_by(dayno_esmvis) %>%
dplyr::mutate(ind_int_esmvis = dplyr::dense_rank(date_esmvis)) %>%
dplyr::ungroup()
}
if ( !(type_vis %in% c("timeseries", "zoom", "network",
"barchart", "combined", "animation")) ) {
# Error message weird format. FIX
stop("You haven't selected a correct type of visualisation in the
argument 'type_vis = ...'. Please choose from: 'timeseries',
'zoom', 'network', or 'barchart'")
}
# Creates new dataset for selected period, but only
# if selected period is sensible
if ( !is.null(sel_period) ) {
if ( sel_period[1] < min(data[["date_esmvis"]], na.rm = TRUE) ) {
stop("First date specified in 'sel_period = ...'
not within (selected) data")
} else if ( sel_period[2] > max(data[["date_esmvis"]], na.rm = TRUE) ) {
stop("Last date specified in 'sel_period = ...'
not within (selected) data")
} else if (sel_period[1] > sel_period[2] ) {
stop("Last date before first date in 'sel_period = ...'!")
} else {
data <- dplyr::filter(data,
date_esmvis >= sel_period[1] &
date_esmvis <= sel_period[2])
}
}
# if ( !is.null(sel_period) ) {
# data <- dplyr::filter(data,
# data[var_date] >= sel_period[1] &
# data[var_date] <= sel_period[2])
# }
# CHECK WHAT GOES ON HERE
# See if dates for zoom fall inside either all dates
# or those dates selected previously with sel_period
if ( !is.null(sel_period_zoom) ) {
if ( !(type_vis %in% c("zoom", "combined")) ) {
warning("Did you forget 'type_vis = 'zoom'")
}
if ( is.null(sel_period) ) {
if ( sel_period_zoom[1] < min(data["date_esmvis"], na.rm = TRUE) ) {
stop("Specified first date not within (selected) data
selected with 'sel_period_zoom = ...'")
} else if ( sel_period_zoom[2] > max(data["date_esmvis"], na.rm = TRUE) ) {
stop("Specified last date not within (selected) data
selected with 'sel_period_zoom = ...'")
} else if (sel_period_zoom[1] > sel_period_zoom[2] ) {
stop("Last date before first date
selected with 'sel_period_zoom = ...'!")
}
} else if ( !is.null(sel_period) ) {
if ( sel_period_zoom[1] < sel_period[1] ) {
stop("Specified last date in 'sel_period_zoom = ...'not within period
selected with 'sel_period = ...' ")
} else if ( sel_period_zoom[2] > sel_period[2] ) {
stop("Specified first date in 'sel_period_zoom = ...'not within period
selected with 'sel_period = ...' ")
} else if (sel_period_zoom[1] > sel_period_zoom[2] ) {
stop("Last date before first date
selected with 'sel_period_zoom = ...'!")
}
}
}
# From wide to long
data_l <- tidyr::gather(data, vars_meas,
key = "Name", value = "Score")
if ( type_vis %in% c("zoom", "combined") && !is.null(sel_period_zoom) ) {
data_zoom <- dplyr::filter(data_l,
data_l["date_esmvis"] >= sel_period_zoom[1] &
data_l["date_esmvis"] <= sel_period_zoom[2])
return(list(data_l = data_l,
data_zoom = data_zoom))
} else {
return(list(data_l = data_l))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.