#' Format DCPO Data for Estimation Using DCPO Stan Files
#'
#' \code{format_dcpo} formats DCPO data output by \code{dcpo_setup} for use with the \code{DCPO}
#' package's \code{dcpo} Stan files.
#'
#' @param dcpo_data a data frame of survey responses generated by \code{dcpo_setup}
#' @param scale_q a string indicating a survey question in dcpo_data to assign difficulty .5 at its cutpoint scale_cp
#' @param scale_cp an integer indicating which cutpoint for scale_q should be assigned difficulty .5
#' @param delta logical: should item difficulty be estimated to vary by question-country to account for potential lack of equivalence?
#'
#' @return a list of Stan data
#'
#' @import dplyr
#' @importFrom reshape2 dcast
#' @importFrom tibble rownames_to_column
#' @importFrom janitor clean_names
#'
#' @export
format_dcpo <- function(dcpo_data, scale_q, scale_cp, delta = TRUE) {
# satisfy R CMD check
country <- year <- item <- r <- n <- NULL
dcpo_data_original <- dcpo_data
if ("data.frame" %in% class(dcpo_data)) {
dcpo_data <- list(dcpo_data)
}
# generate cumulative number of respondents with answers above each cutpoint
dcpo_stan <- map(dcpo_data, function(df) {
dat <- df %>%
group_by(country, year, item) %>%
mutate(r = case_when(r == -1 ~ 1,
r == 999 ~ max(setdiff(r, 999)),
TRUE ~ r)) %>%
ungroup() %>%
group_by(country, year, item, r, survey, cc_rank) %>%
summarize(n = sum(n), .groups = "keep") %>%
ungroup() %>%
arrange(desc(cc_rank), country, year) %>%
mutate(kk = as_factor(country),
tt = year - min(year) + 1,
qq = as_factor(item),
rr = r - 1,
question = item,
item = paste(question, r, "or higher")) %>%
group_by(country, year, question) %>%
arrange(desc(r), .by_group = TRUE) %>%
mutate(y_r = round(cumsum(n)),
n_r = round(sum(n))) %>%
arrange(r, .by_group = TRUE) %>%
ungroup() %>%
arrange(kk, tt) %>%
filter(y_r > 0 & rr > 0)
use_delta <- dat %>%
group_by(qq, kk) %>%
summarize(years = n_distinct(year)) %>%
ungroup() %>%
spread(key = kk, value = years, fill = 0) %>%
mutate(countries = rowSums(.[, -1] > 1)) %>%
mutate_at(vars(-qq, -countries),
~ if_else(. > 1 & countries > 2 & qq != scale_q, 1, 0)) %>%
select(-qq, -countries) %>%
{if (!delta) mutate_all(., ~ 0) else .}
scale_item_matrix <- dat %>%
group_by(qq, rr) %>%
summarize(n = sum(n_r)) %>%
ungroup() %>%
spread(key = rr, value = n, fill = 0) %>%
janitor::clean_names() %>%
mutate_at(vars(matches(paste0("x\\d+$"))), ~if_else(. > 0, 10, 0)) %>%
mutate_at(vars(matches(paste0("x", scale_cp, "$"))), ~if_else(qq == scale_q, . + 1, 0)) %>%
mutate_at(vars(matches(paste0("x\\d+$"))), ~if_else(. > 0, . - 10, 0)) %>%
select(-qq) %>%
as.matrix()
stopifnot(sum(scale_item_matrix) == 1)
one_dcpo_stan <- list( K = max(as.numeric(dat$kk)),
T = max(dat$tt),
Q = max(as.numeric(dat$qq)),
R = max(dat$rr),
N = nrow(dat),
kk = as.numeric(dat$kk),
tt = as.numeric(dat$tt),
qq = as.numeric(dat$qq),
rr = dat$rr,
y_r = dat$y_r,
n_r = dat$n_r,
fixed_cutp = scale_item_matrix,
use_delta = use_delta,
data = dat,
data_args = list(scale_q = scale_q, scale_cp = scale_cp, delta = delta))
})
if ("data.frame" %in% class(dcpo_data_original)) {
dcpo_stan <- dcpo_stan[[1]]
}
return(dcpo_stan)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.