#' Create long format of a dataframe, keeping the id column.
#'
#' @param df Dataframe with a key variable (\code{id}).
#' @param id name of the key variable in the dataframe.
#' @return Dataframe consisting of 3 columns \code{id}, \code{var} & \code{val},
#' containing the dataframe in long format (based on dplyr::gather).
#' @importFrom tidyr gather spread
#' @importFrom dplyr mutate mutate_all arrange
#' @importFrom purrr imap_dfr
#' @importFrom assertthat assert_that not_empty is.string
#' @importFrom rlang := ensym .data
#' @export
#' @examples
#' # load spss data
#' path <- system.file("examples", "iris.sav", package = "haven")
#' df <- haven::read_sav(path) %>%
#' # add id column
#' tibble::rownames_to_column("id")
#' df %>% longen()
longen <- function(df, id = "id") {
res <-
df %>%
unattr() %>%
gather("var", "val", -{{ id }}) %>%
full_join(df %>% select(-{{ id }}) %>% tab_types(), by = "var") %>%
spread(.data$type, .data$val, convert = T) %>%
factor_arrange(levels = names(df))
# mutate(var = factor(.data$var, levels = names(df))) %>%
# arrange(.data$var) %>%
# mutate(var = as.character(.data$var))
if (!"cv" %in% names(res)) {
res["cv"] <- NA_character_
}
if (!"nv" %in% names(res)) {
res["nv"] <- NA_real_
}
res
}
#' Remove attributes from a vector
#'
#' @param x vector
#'
#' @return x with attributes removed
#' @export
#'
#' @examples
#' x <- haven::labelled(1:3, label = "variable_label")
#' strip_attributes(x)
strip_attributes <- function(x) {
attributes(x) <- NULL
x
}
#' Remove attributes from all variables of a dataframe
#'
#' @param df A dataframe
#'
#' @return A copy of \code{df} with all attributes removed.
#' @importFrom dplyr mutate_if mutate_all
#' @export
#'
#' @examples
#' path <- system.file("examples", "iris.sav", package = "haven")
#' df <- haven::read_sav(path)
#' df %>% unattr()
unattr <- function(df) {
df <- df %>%
mutate(across(.fns = strip_attributes)) %>%
# mutate_if(is.factor, as_labelled) %>%
mutate_if(is.integer, as.numeric)
# mutate_all(as.vector)
# # print(df %>% map_chr(typeof))
stopifnot(unique(map_chr(df, class)) %in% c("character", "numeric"))
df
}
#' Convert a factor to a haven::labelled variable
#'
#' @param x Factor.
#'
#' @return Returns a labelled variable with values and labels according
#' to the levels in the factor. This should roundtrip with forcats::as_factor.
#' Additionally, a variable label can be set.
#' @importFrom forcats as_factor
#' @export
#'
#' @examples
#' x <- factor(c("b", "b", "a"))
#' attr(x, "label") <- "variable label"
#' x
#' as_labelled(x)
#' all.equal(x,
#' x %>%
#' as_labelled() %>%
#' forcats::as_factor())
as_labelled <- function(x) {
labs <- set_names(sort(unique(as.numeric(x))),
levels(x))
haven::labelled(as.numeric(x), labels = labs, label = attr(x, "label"))
# map(c(vall=levels, val=unclass), ~.x(x)) %>% as_tibble() %>% distinct() %>% deframe()
}
factor_arrange <- function(df, var = var, levels) {
df %>% mutate(var = factor(.data$var, levels = levels)) %>%
arrange(.data$var) %>%
mutate(var = as.character(.data$var))
}
cols_differ <- function(df_cts, col_spec) {
match_str <- paste0("^", col_spec, "\\d+$")
col_name <- paste0(col_spec, "_diff")
diff_lgl <- df_cts %>% select(matches(match_str)) %>% is_diff_in_cols()
df_cts %>% mutate(!!col_name := diff_lgl)
}
is_diff_in_cols <- function(df) {
df %>% t() %>% as.data.frame() %>% map_int(n_distinct) > 1
}
add_list_suffix <- function(l, cols) {
l %>%
imap(~rename_at(.x, vars(cols), ~paste0(., !!.y)))
}
list_join <- function(l, join=full_join, by) {
col_names <- names(l[[1]])
by <- match.arg(by, col_names, several.ok = TRUE)
l %>% add_list_suffix(setdiff(col_names, by)) %>% reduce(join, by = by)
}
# Taken from:
# http://www.cookbook-r.com/Manipulating_data/Comparing_vectors_or_factors_with_NA/
compareNA <- function(v1, v2) {
same <- (v1 == v2) | (is.na(v1) & is.na(v2))
same[is.na(same)] <- FALSE
same
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.