"%!in%" <- function(x, y) !("%in%"(x, y))
binom_test_adapted <- function(x, n, p, alternative) {
if (sum(is.na(c(x, n, p))) == 0) {
ret <- binom.test(x, n, p, alternative=alternative)$p.value
} else {
ret <- NA_real_
}
return(ret)
}
p_adjust_adapted <- function(p,n, method) {
if (sum(is.na(c(p, n))) == 0 ) {
ret <- p.adjust(p, n, method)
} else {
ret <- NA_real_
}
return(ret)
}
#
# survey_data = survey_list$survey_data
#answer_options_to_exclude = c(-1,-2,-3)
# alpha = 0.05
# max_answer_options_of_question = 15
# prepro_time_data <- preprocess_time_data(survey_list$time_data)
# translate_table <- attr(prepro_time_data, "translate_table")
# questions_to_keep <- "JVS_OffeneStellen_F01_Anzahl"
#' Test answer distribution
#'
#' This function investigates the answer shares by interviewer and detects deviances in shares.
#'
#' @param survey_data data table of class **be_iaclass**
#' @param answer_options_to_exclude a vector of answer options that shall not be included in the analysis
#' @param alpha singnificance level on that shall be tested
#' @param max_answer_options_of_question threshold of number of answer options that the question shall be analysed
#' @return data table with unadjusted and adjusted p-values for each question-answer-interviewer-combination
#' @details Each answer option is treated as a binary variable.
#' To test whether the share of a specific answer option for a specific interviewer deviates from the expected probability, estimated from all interviews, a binomial test is carried out. p-values are p-adjusted. Significances are calculated on #'level alpha.
#' @export
inv_answer_shares <- function(
survey_data,
answer_options_to_exclude,
alpha = 0.05,
max_answer_options_of_question =15,
translate_table = NULL,
output = "conspi"
) {
survey_data <- copy(survey_data)
# test if object of class be_iaclass
stopifnot(inherits(survey_data, "iaclass_survey"))
# Stefan fragen ob das eher schon in die S3 Definition
# eliminate unwanted answer_options
for(col in names(survey_data)) set(survey_data, i=which(survey_data[[col]] %in% answer_options_to_exclude), j=col, value=NA)
survey_data[, ID := NULL] # brauchen wir das überhaupt je? sonst kann man sich das gleich sparen
#colnames(survey_data) %>% # ist das sehr dumm?
# lapply(function(x) survey_data[, c(x) := as.factor(as.character(c(x)))]) %>%
# invisible
# das kann sehr gefährlich sein!! bepsrechen!!
h <- function(w) if( any( grepl( "are not all of the same type", w) ) ) invokeRestart( "muffleWarning" )
# create long table
shares_long_dt <- withCallingHandlers(melt(survey_data,
id.vars = "INTERVIEWER"), warning = h)[,
.N, by = .(INTERVIEWER, variable, value)]
shares_long_dt <- shares_long_dt[!is.na(value), ] # na überdenken
# remove variables with too many answer options (not optimal for binomal analyis)
shares_long_dt[, answer_options_of_question := length(table(value)), by = variable]
# if (is.null(questions_to_keep)) {
shares_long_dt <- shares_long_dt[answer_options_of_question <= max_answer_options_of_question, ]
#} else {
#shares_long_dt <- shares_long_dt[(answer_options_of_question <= max_answer_options_of_question) |
# (variable %in% questions_to_keep), ]
# }
# create variables necessary for binomial test
shares_long_dt[,
question_asked := sum(N),
by = c("INTERVIEWER", "variable")]
shares_long_dt[, ratio := N/question_asked]
# create estimate without specific interviewer
shares_long_dt[,
estimate := (sum(N) - N)/(sum(question_asked) - question_asked),
by = c("variable", "value")]
# carry out binomial test
shares_long_dt[, p_value_binom_unadjusted := binom_test_adapted(
x = N,
n = question_asked,
p = estimate,
alternative = "two.sided"
),
by = .(INTERVIEWER, variable, value)
]
# adjusten for werte für die kein p-value gebildete werden konnte - russisch
shares_long_dt[is.na(p_value_binom_unadjusted), p_value_binom_unadjusted := 1]
#anzahl_interviewer <- length(unique(shares_long_dt$INTERVIEWER))
#shares_long_dt[,p_value_binom_adjusted := p.adjust.adapted(p = p_value_binom_unadjusted,
# n = anzahl_interviewer), by=.(INTERVIEWER, variable, value)]
# adjust p values
shares_long_dt[,
p_value_binom_adjusted := p.adjust(p_value_binom_unadjusted, method="fdr"),
by = .(variable, value)
]
# test for significany on alpha level for each answer in each question
shares_long_dt[,
sign := as.numeric(p_value_binom_adjusted < alpha),
by=.(INTERVIEWER, variable, value)
]
# check whether any answer option in whole question was signficant
shares_long_dt[,
shares_conspi := max(sign, na.rm = T),
by=.(INTERVIEWER, variable)
]
class(shares_long_dt) <- c("iaclass_shares", class(shares_long_dt))
setattr(shares_long_dt, "key_variables", attr(data, "key_variables"))
shares_long_dt[, marker := c(1L, rep(0L, .N-1L)), by=.(INTERVIEWER, variable)]
#shares_short_dt <- copy(shares_long_dt)
#shares_short_dt <- shares_short_dt[marker == 1,]
shares_short_dt <- shares_long_dt[marker == 1, ]
#shares_long_dt <- shares_short_dt[, ID := NULL]
shares_short_dt <- shares_short_dt[, c("INTERVIEWER", "variable", "shares_conspi")]
# ist an der stelle auch unlogisch und zu früh...gehört eins später aber ich machs jetzt mal
# der einfachkeit halber so
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# das ist super vorläufig programmiert
if (("key_variables" %in% names(attributes(survey_data)) )) {
shares_short_dt[,shares_filter_conspi :=
as.numeric(
shares_conspi == 1 &
variable %in% attr(survey_data, "key_variables")
)
]
}
if (!is.null(translate_table)) {
shares_short_dt <- merge(shares_short_dt ,
translate_table,
by.x = "variable",
by.y = "orignial_name",
all.x = T)
#shares_short_dt[, mc_variable := ifelse(is.na(mc_name), 1, 0)]
shares_short_dt [is.na(mc_name), mc_name := variable]
shares_short_dt [, variable := mc_name]
shares_short_dt [, mc_name := NULL]
shares_long_dt[,
shares_conspi := max(sign, na.rm = T),
by=.(INTERVIEWER, variable)
]
shares_short_dt [, marker := c(1L, rep(0L, .N-1L)), by=.(INTERVIEWER, variable)]
shares_short_dt <- shares_short_dt [marker == 1, ]
variables_short <- intersect(
c("INTERVIEWER", "variable", "shares_conspi", "shares_filter_conspi"),
colnames(shares_short_dt)
)
shares_short_dt <- shares_short_dt[, variables_short, with = F]
}
setattr(shares_short_dt, "key_variables", attr(data, "key_variables"))
if (output == "conspi") {
return(shares_short_dt) }
if (output == "detailed") {
return(shares_long_dt) }
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.