#' Create A Metadata Table
#'
#' Create a metadata file from your surveys.
#'
#' @details The structure of the returned tibble:
#' \describe{
#' \item{filename}{The original file name}
#' \item{qb}{Identifier of a question block for further tasks.}
#' \item{var_name_orig}{The original variable name in SPSS.}
#' \item{class_orig}{The original variable class after importing with\code{\link[haven]{read_spss}}.}
#' \item{var_label_orig}{The original variable label in SPSS.}
#' \item{var_label_norm}{A normalized version of the variable labels.}
#' \item{var_name_suggested}{A partly canonized variable name.}
#' \item{factor_levels}{A list of factor levels, i.e. value labels in SPSS}
#' \item{na_levels}{Values marked as missing by GESIS.}
#' \item{valid_range}{Not missing factor(category) levels.}
#' \item{class_suggested}{A suggested class conversion.}
#' \item{length_cat_range}{Number of categories in the non-missing range.}
#' \item{length_na_range}{Number of categories marked as missing by GESIS.}
#' \item{length_total_range}{Number of categories or unique levels, which may be different from the sum of missing and category labels.}
#' \item{n_categories}{Number of categories of the variable, should be the sum of the former two.}
#' }
#' @param survey_list A list of data frames containing surveys, or a
#' single survey in a single data frame. The filename should be added
#' in the column \code{filename}.
#' @importFrom labelled val_labels var_label
#' @importFrom dplyr full_join mutate
#' @importFrom tibble tibble
#' @importFrom tidyselect all_of
#' @return A data frame with the original variable attributes and
#' suggested conversions and changes.
#' @examples
#' import_file_names <- c(
#' 'ZA7576_sample','ZA5913_sample'
#' )
#'
#' my_surveys <- read_surveys (
#' import_file_names,
#' .f = 'read_example_file' )
#'
#' metadata <- gesis_metadata_create(my_surveys)
#' @export
gesis_metadata_create <- function ( survey_list ) {
## if input is a data.frame, place it in a list -----------
if ( ! "list" %in% class(survey_list) ) {
survey_list <- to_survey_list( x = survey_list )
}
## start of internal function metadata_create -------------
metadata_create <- function (dat) {
class_orig <- vapply (
dat, function(x) class(x)[1], character(1)
)
attr (dat$filename, "label" ) <- "filename"
get_labels <- function( dat ) {
vapply ( dat, function(x) attr(x, "label"), character(1))
}
var_label_orig <- vapply ( dat, labelled::var_label, character(1) )
var_label_norm <- label_normalize(x = var_label_orig )
var_label_suggested <- label_suggest( var_label_norm,
names(dat) )
## Creating the basic metadata ----
metadata <- tibble::tibble (
filename = unique(dat$filename)[1],
var_name_orig = names ( dat ),
class_orig = class_orig,
var_label_orig = var_label_orig,
var_label_norm = var_label_norm,
var_name_suggested = var_label_suggested
)
## Creating a catalogue of possible categories / factor levels ----
all_val_labels <- sapply ( dat, labelled::val_labels )
value_labels_df <- data.frame (
var_name_orig = names ( all_val_labels )
)
value_labels_df$factor_levels <- all_val_labels
value_labels_df$na_levels <- sapply ( dat, labelled::na_values)
## Establish the valid range of categories without missings -----
fn_valid_range <- function ( x ) {
element_name <- names ( value_labels_df$factor_levels[x] )
f <- unlist(value_labels_df$factor_levels[x])
n <- unlist(value_labels_df$na_levels[x])
valid_range <- f[ ! f %in% n]
if ( ! is.null(valid_range) ) {
names( valid_range) <- gsub(paste0(element_name, "."), "", names(valid_range))
}
normalized_labels <- label_normalize(names(valid_range))
valid_range [which ( ! grepl( "inap|refus|dk|decline", normalized_labels))]
}
value_labels_df$valid_range <- sapply (
1:nrow(value_labels_df), fn_valid_range )
value_labels_df$length_cat_range <- sapply (
value_labels_df$valid_range, length )
value_labels_df$length_na_range <- sapply (
value_labels_df$na_levels, length )
value_labels_df$length_total_range <- vapply (
dat, function(x) length(unique(x)), numeric(1))
##Merging the basic metadata with the categories
metadata <- dplyr::full_join(
metadata,
value_labels_df, by = 'var_name_orig' )
metadata$n_categories <- vapply (
sapply ( metadata$factor_levels, unlist ),
length, numeric(1) ) #number of categories in categorical variables
metadata <- question_block_identify(metadata)
metadata <- metadata %>%
select ( all_of(c("filename", "qb", "var_name_orig",
"var_label_orig",
"var_label_norm", "var_name_suggested",
"length_cat_range", "length_na_range",
"length_total_range",
"n_categories",
"factor_levels", "valid_range", "na_levels",
"class_orig")
)
)
## class_suggest is not exported, it is in utils.R
## Can be directly called as eurobarometer:::class_suggest(metadata)
class_suggest(metadata)
}
tmp <- metadata_create( dat = survey_list[[1]])
metadata_list <- lapply ( survey_list, metadata_create )
do.call(rbind,metadata_list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.