R/format_claassen.R

Defines functions format_claassen_cls format_claassen

Documented in format_claassen

#' Format DCPO Data for Estimation Using Claassen's (2019) Stan Files
#'
#'  \code{format_claassen} formats DCPO data output by \code{dcpo_setup} for use with Claassen's
#'  Stan files for "Estimating Smooth Country-Year Panels for Public Opinion" (_Political Analysis_,
#'  2019).
#'
#' @param dcpo_data a data frame of survey responses generated by \code{dcpo_setup}
#'
#' @details  \code{format_claassen} formats DCPO data output by \code{dcpo_setup} for use with Claassen's
#' Stan files for "Estimating Smooth Country-Year Panels for Public Opinion" (_Political Analysis_,
#' 2019).  Following Claassen, it dichotomizes variables by coding responses above the midpoint of
#' the response scale as 1.
#'
#' @return a list of Stan data
#'
#' @import dplyr
#' @importFrom stats median
#' @importFrom purrr map
#'
#' @export

format_claassen <- function(dcpo_data) {
    # satisfy R CMD check
    country <- year <- item <- r <- n <- survey <- x <- NULL

    dcpo_data_original <- dcpo_data
    if ("data.frame" %in% class(dcpo_data)) {
        dcpo_data <- list(dcpo_data)
    }

    # dichotomize (values above midpoint of response scale coded 1)
    claassen_stan <- map(dcpo_data, function(df) {
        dat <- df %>%
            group_by(country, year, item) %>%
            mutate(median_r = median(setdiff(r, c(-1, 999)))) %>% # -1 & 999 are missing, not response values
            summarize(x = sum(ifelse(r > median_r, n, 0)),
                      samp = sum(n),
                      survey = first(survey)) %>%
            ungroup() %>%
            filter(x > 0)

        one_stan_input <- list(  N       = nrow(dat),
                                 K       = dplyr::n_distinct(dat$item),
                                 T       = max(dat$year) - min(dat$year) + 1,
                                 J       = dplyr::n_distinct(dat$country),
                                 P       = max(as.numeric(as.factor(paste(dat$country, dat$item)))),
                                 jj      = as.numeric(as.factor(dat$country)),
                                 kk      = as.numeric(as.factor(dat$item)),
                                 tt      = dat$year - min(dat$year) + 1,
                                 pp      = as.numeric(as.factor(paste(dat$country, dat$item))),
                                 x       = round(dat$x),
                                 samp    = round(dat$samp),
                                 data    = dat)
    })

    if ("data.frame" %in% class(dcpo_data_original)) {
        claassen_stan <- claassen_stan[[1]]
    }

    return(claassen_stan)
}

format_claassen_cls <- function(claassen_data) {
    # satisfy R CMD check
    country <- year <- item <- r <- n <- survey <- x <- NULL

    # dichotomize (values above midpoint coded 1)
    dat <- claassen_data %>%
        filter(x > 0)

    claassen_stan <- list(  N       = nrow(dat),
                            K       = dplyr::n_distinct(dat$item),
                            T       = max(dat$year) - min(dat$year) + 1,
                            J       = dplyr::n_distinct(dat$country),
                            P       = max(as.numeric(as.factor(paste(dat$country, dat$item)))),
                            jj      = as.numeric(as.factor(dat$country)),
                            kk      = as.numeric(as.factor(dat$item)),
                            tt      = dat$year - min(dat$year) + 1,
                            pp      = as.numeric(as.factor(paste(dat$country, dat$item))),
                            x       = round(dat$x),
                            samp    = round(dat$samp),
                            data    = dat)

    return(claassen_stan)
}
fsolt/DCPOtools documentation built on June 9, 2025, 4:10 p.m.