#' Format a Numeric as a String Percentage
#' This function formats a decimal number into a string percentage with
#' a specific number of decimal places and with a "%" character.
#' The function uses formatC from the R base package and
#' defaults to formatting the number with 1 digit and with the "f" format
#' in the formatC argument.
#' This function is originally from the StackOverflow post:
#' http://stackoverflow.com/questions/12688717/round-up-from-5-in-r
percent0 <- function(x,
digits = 1,
format = "f",
...) {
# always rounding 5 up
# taken from the following stackoverflow post
round2 = function(x, n) {
posneg = sign(x)
z = abs(x) * 10 ^ n
z = z + 0.5
z = trunc(z)
z = z / 10 ^ n
z * posneg
}
paste0(formatC(round2(100 * x, 1), format = format, digits = digits, ...), "%")
}
#' Find out Which Choices Have Text Entry Components
#'
#' @name which_choices_have_text_entry
#' @description Given a single question, this function
#' determines which of the question[['Payload']][['Choices']]
#' elements have a TextEntry which is enabled for free response
#' input. The returned value is a list of integers representing
#' the indices of which choices have text entry components.
#' @param question This is a list object representing an individual question
#' from a Qualtrics Survey File. A question must have the 'Choices' list
#' inside its 'Payload' in order to be accepted by this function.
#' @return A list of the indices for the choices which have a text entry component.
which_choices_have_text_entry <- function(question) {
# Error if question[['Payload']][['Choices']] doesn't exist.
if (! 'Payload' %in% names(question) ||
! 'Choices' %in% names(question[['Payload']])) {
stop("The passed question must contain a Payload with Choices.")
}
# For those choices with a "TextEntry" field,
# return which of those have TextEntry as "true"
text_entry_choices <-
which(as.logical(lapply(question[['Payload']][['Choices']],
function(x) {
'TextEntry' %in% names(x) &&
x[['TextEntry']] == "true"
})))
return(text_entry_choices)
}
#' Convert the Variable Response into its Corresponding Text
#'
#' @name question_variable_to_choice_text
#' @description This function looks up a choice's corresponding text
#' by using the question[['Payload']][['Choice']] list, and if applicable,
#' the question[['Payload']][['RecodeValues']] list as an intermediary lookup.
#' Moreover, if a choice has a corresponding text entry component, this function
#' attempts to identify which column corresponds to the text entry responses
#' and modifies the returned choice text to include "See Appendix [Column DataExportTag]"
#' where the [Column DataExportTag] is replaced by the column name of the corresponding
#' text entry response column.
#' @param question This is a list object representing an individual question
#' from a Qualtrics Survey File. A question must have the 'Choices' list
#' inside its 'Payload' in order to be accepted by this function. Furthermore,
#' the choice provided must appear in the Choices list (or if use_recode_values is
#' TRUE then the choice argument provided must appear among the RecodeValues list).
#' @param choice A choice response to a question, given as a string or number.
#' @param use_recode_values A logical boolean value indicating whether or not the
#' RecodeValues list needs to be used as an intermediary step in looking up the
#' text corresponding to the passed choice argument.
#' @return The text corresponding to the variable choice indicated for the given question.
question_variable_to_choice_text <- function(question, choice, use_recode_values) {
# Error if question[['Payload']][['Choices']] doesn't exist.
if (! 'Payload' %in% names(question) ||
! 'Choices' %in% names(question[['Payload']])) {
stop("The passed question must contain a Payload with Choices.")
}
if (missing(use_recode_values)) {
if ("RecodeValues" %in% names(question[['Payload']]) &&
length(question[['Payload']][['RecodeValues']]) > 0) {
use_recode_values <- TRUE
} else use_recode_values <- FALSE
}
# Error if use_recode_values is true, but there are no recode values in the question.
if (use_recode_values == TRUE &&
! "RecodeValues" %in% names(question[['Payload']])) {
stop(
"The passed question does not have a RecodeValues ",
"field in its payload, but the question_variable_to_choice_text function ",
"was called with the parameter use_recode_values = TRUE.")
}
# Use recode values to reindex the variable if necessary. If a question
# does not use recode values, then the choice as a variable response already
# corresponds with the name of the corresponding choice in the QSF data.
if (use_recode_values) {
choice_index <- which(question[['Payload']][['RecodeValues']] == choice)
if (length(choice_index) == 0) {
stop("The choice ", choice, " provided does not match any RecodeValue",
"in the question", question[['Payload']][['DataExportTag']])
}
choice_index <- names(question[['Payload']][['RecodeValues']])[[choice_index]]
choice_index <- which(names(question[['Payload']][['Choices']]) == choice_index)
} else if (choice %in% names(question[['Payload']][['Choices']])) {
choice_index <- which(names(question[['Payload']][['Choices']]) == choice)
} else {
stop("The choice ", choice, " does not match any choice in the question ",
question[['Payload']][['DataExportTag']])
}
# Determine which elements of the question[['Payload']][['Choices']] list
# have text entry components. Later this is used to insert "See Appendix ..."
# for those choices.
te_components <- which_choices_have_text_entry(question)
# After reindexing if necessary, get the choice text corresponding to the
# corresponding choice's index.
choice_text <- question[['Payload']][['Choices']][[choice_index]][[1]]
# If the choice is one with text entry components, insert
# "See Appendix [Column DataExportTag]"
# so that the corresponding text appendix may be easily found.
if (choice_index %in% te_components) {
# Get the response column names which have "TEXT" in them.
text_entry_column_names <-
which(grepl("TEXT", colnames(question[['Responses']])))
text_entry_column_names <-
colnames(question[['Responses']])[text_entry_column_names]
# If there is only one response column name which has "TEXT"
# in it, then use that as the corresponding export tag for any
# question choice which has an associated text entry component.
# If there are multiple, check if "*_[choice]_TEXT" matches any
# of the response column names exactly. Finally, if neither of
# the previous steps succeed in finding the column data export tag
# which corresponds to the given question choice, just use the question's
# data export tag.
if (length(text_entry_column_names) == 1) {
corresponding_export_tag <- text_entry_column_names[[1]]
} else {
choice_and_TEXT <- paste0("_", choice, "_", "TEXT")
matching_text_entry_colnames <-
which(grepl(choice_and_TEXT, text_entry_column_names))
if (length(matching_text_entry_colnames) == 1) {
corresponding_export_tag <-
text_entry_column_names[[matching_text_entry_colnames[[1]]]]
} else {
corresponding_export_tag <- question[['Payload']][['DataExportTag']]
}
}
# Clean the choice text of HTML entities.
choice_text <- clean_html(choice_text)
# Insert the corresponding export tag into the question choice.
choice_text <- paste0(choice_text,
" See Appendix ",
corresponding_export_tag)
}
return(choice_text)
}
#' Create the Results Table for a Multiple Choice Single Answer Question
#'
#' The mc_single_answer_results function uses the definition of the choices in the QSF file
#' and their potentially recoded values to determine how to table the results paired to that question.
#'
#' @param question This is a list object representing an individual question
#' from a Qualtrics Survey File. The question must have a paired
#' response column placed into the question
#' under [['Responses']]. The insertion of the responses into questions is
#' handled by link_responses_to_questions.
#' @param original_first_rows A dataframe contianing the header information
#' for each column of response data. This dataframe includes a row for the DataExportTag based
#' response column names, another for the Question Text stem and choice text (although
#' truncated), and a row with QID based column names.
#' @return A table with an N, Percent, and an unlabeled column
#' containing the question's chocies. In the N and Percent columns
#' the frequency statistics are computed for the question
#' choice.
mc_single_answer_results <-
function(question, original_first_rows) {
# If a question's response options were given recoded values
# during the survey's creation, the variables for the recoded values
# are used as the factors a response could match. If there was no
# recoding of values, then the choices are directly use as the factors
# a response can match.
if ("RecodeValues" %in% names(question[['Payload']]) &&
length(question[['Payload']][['RecodeValues']]) > 0) {
factors <- unlist(question[['Payload']][['RecodeValues']])
} else {
factors <- names(question[['Payload']][['Choices']])
}
# get the data export tag and questionID
exporttag <- question[['Payload']][['DataExportTag']]
questionID <- question[['Payload']][['QuestionID']]
# if the original_first_rows are available, we use them to pick the right
# response column, and if not, we select the column by using the DataExportTag
if (!missing(original_first_rows)) {
selected_column <- which(original_first_rows[2,] == questionID)
selected_column <-
colnames(original_first_rows)[selected_column]
responses <- question[['Responses']][[selected_column]]
} else {
responses <- question[['Responses']][[exporttag]]
}
# first, take the responses and sort them by the factors to get responses_tabled.
# the second column of the responses_tabled are the numbers of responses for each factor, our Ns.
# total number of responses to a question is counted by all the answers that aren't -99
# use Ns and respondent_count to calculate the percents for each factor.
responses_tabled <-
as.data.frame(table(factor(responses, levels = factors)))
N <- responses_tabled[, 2]
respondent_count <-
length(which((question[['Responses']][[exporttag]] != -99) &
(question[['Responses']][[exporttag]] != "")))
Percent <- percent0(N / respondent_count)
# if the choice variables have been recoded, first the factors are retrieved from the responses_tabled,
# then they are turned into the list of corresponding indexes in the RecodeValues list,
# which are then used to recover the original choice text from the Choices list,
# and then the choices are flattened to a cleaner list.
# if the choice variables are not recoded, then they can be retrieved directly from the responses_table
choices <- sapply(responses_tabled[, 1], function(choice) question_variable_to_choice_text(question, choice))
# construct the results table with a column for N, Percent, and choices,
# but make sure that the choices column doesn't have a header when it prints.
results_table <-
data.frame(N, Percent, choices, row.names = NULL)
colnames(results_table)[3] <- ""
# append the results table to the question
question[['Table']] <- results_table
# add a denominator note to the question
if (!'qtNotes' %in% names(question))
question[['qtNotes']] <- list()
question[['qtNotes']] <-
c(question[['qtNotes']], paste0('Denominator Used: ', toString(respondent_count)))
return(question)
}
#' Create the Results Table for a Multiple Choice Single Answer Question
#'
#' The mc_multiple_answer_results function uses the definition of the choices in the QSF file
#' and their (if present) recoded values to determine how to table the results paired to that question.
#'
#' @inheritParams mc_single_answer_results
mc_multiple_answer_results <-
function(question, original_first_rows) {
# save the original responses
orig_responses <- question[['Responses']]
# determine if we should use the original_first_rows
if (!missing(original_first_rows) &&
all(colnames(orig_responses) %in% colnames(original_first_rows)) &&
dim(original_first_rows)[[1]] >= 2) {
should_use_ofr <- TRUE
} else
should_use_ofr <- FALSE
# search either the import IDs or response column names
# for the string "TEXT", and include all but those including
# "TEXT" from the relevant_responses
if (should_use_ofr) {
relevant_responses <-
orig_responses[which(unlist(lapply(colnames(orig_responses), function(x)
! grepl("TEXT", original_first_rows[2, x]))))]
} else {
relevant_responses <-
orig_responses[which(unlist(lapply(colnames(orig_responses), function(x)
! grepl("TEXT", x))))]
}
# create a list of possible variations of the data export tag
data_export_tag <- question[['Payload']][['DataExportTag']]
data_export_tags <-
c(data_export_tag,
gsub("#", "_", data_export_tag),
gsub("-", "_", data_export_tag))
data_export_tags <-
unique(c(paste0(data_export_tags, "_"), data_export_tags))
data_export_tags <- paste0(data_export_tags, collapse = "|")
# rename the columns to be the choice indices they represent
colnames(relevant_responses) <-
lapply(colnames(relevant_responses), function(x) {
if (should_use_ofr) {
choice_index <- original_first_rows[2, x]
choice_index <- gsub("QID[0-9]*-", "", choice_index)
} else {
x <- gsub(data_export_tags, "", x)
if ("RecodeValues" %in% names(question[['Payload']]) &&
x %in% question[['Payload']][['RecodeValues']]) {
x <-
names(question[['Payload']][['RecodeValues']])[
which(question[['Payload']][['RecodeValues']] == x)]
}
return(x)
}
})
# get the number of respondents for each choice
N <-
lapply(relevant_responses, function(x)
sum(x != 0 & x != -99 & x != ""))
# determine if the question has any NA-type choices
if ('RecodeValues' %in% names(question[['Payload']])) {
has_na <- any(question[['Payload']][['RecodeValues']] < 0)
} else
has_na <- FALSE
# if the question has NA choices, calculate a valid_denominator
if (has_na) {
non_negative_columns <-
which(unlist(lapply(colnames(relevant_responses), function(x) {
question[['Payload']][['RecodeValues']][[x]] >= 0
})))
non_negative_responses <-
relevant_responses[non_negative_columns]
valid_denominator <-
length(which(apply(non_negative_responses, 1, function(x) {
!(all(x == -99) | all(x == "") | all(x == 0))
})))
}
# calculate the total denominator
total_denominator <-
length(which(apply(relevant_responses, 1, function(x) {
!(all(x %in% c(-99, "", 0)))
})))
# calculate the percent for each column:
# if it's an NA-column use the total denominator,
# if it's not an NA-column, but the question has NA options, use the valid denominator
# if the question has no NA choices, use the total_denominator
Percent <- lapply(1:length(N), function(x) {
if (has_na &&
!names(N)[[x]] %in% colnames(non_negative_responses)) {
percent0(N[[x]] / total_denominator)
} else if (has_na &&
names(N)[[x]] %in% colnames(non_negative_responses)) {
percent0(N[[x]] / valid_denominator)
} else {
percent0(N[[x]] / total_denominator)
}
})
# Since we've already translated converted the choices from recode values to choice
# variables, in the following call we set use_recode_values = FALSE.
choices <-
lapply(names(N), function(choice)
question_variable_to_choice_text(question, choice, use_recode_values = FALSE))
# make sure that these are flat lists
choices <- unlist(choices, use.names = FALSE)
N <- unlist(N, use.names = FALSE)
Percent <- unlist(Percent, use.names = FALSE)
# construct and return the output data frame
results_table <-
data.frame(N, Percent, choices, row.names = NULL)
colnames(results_table)[3] <- ""
# append the results table
question[['Table']] <- results_table
# add a note for the denominators used in the question
if ('qtNotes' %in% names(question))
question[['qtNotes']] <- list()
if (exists('valid_denominator')) {
question[['qtNotes']] <-
c(question[['qtNotes']], paste0('Valid Denominator Used: ',
toString(valid_denominator)))
question[['qtNotes']] <-
c(question[['qtNotes']], paste0('Total Denominator Used: ',
toString(total_denominator)))
} else {
question[['qtNotes']] <-
c(question[['qtNotes']], paste0('Denominator Used: ',
toString(total_denominator)))
}
return(question)
}
#' Create the Results Table for a Matrix Single Answer Question
#'
#' The matrix_single_answer_results function uses the
#' definition of the choices and answers in the
#' QSF file and their potentially recoded values to
#' determine how to table the results paired
#' to that question. If you look at the source code,
#' keep in mind that a matrix question's sub-questions
#' are called "Choices" and that the choices for each
#' sub-question are called "Answers"
#'
#' @inheritParams mc_single_answer_results
#' @return a table with the matrix-sub-questions listed
#' in the first column, the percentages for each
#' choice for each sub-question listed in a table, and
#' then another column with the total respondents
#' for each subquestion.
matrix_single_answer_results <-
function(question, original_first_rows) {
# save the original responses
orig_responses <- question[['Responses']]
# determine if we should use the original_first_rows
if (!missing(original_first_rows) &&
all(names(orig_responses) %in% names(original_first_rows)) &&
nrow(original_first_rows) >= 2) {
should_use_ofr <- TRUE
} else
should_use_ofr <- FALSE
# search either the import IDs or response column names
# for the string "TEXT", and include all but those including
# "TEXT" from the relevant_responses
if (should_use_ofr) {
relevant_responses <-
orig_responses[which(unlist(lapply(colnames(orig_responses), function(x)
! grepl("TEXT", original_first_rows[2, x]))))]
} else {
relevant_responses <-
orig_responses[which(unlist(lapply(colnames(orig_responses), function(x)
! grepl("TEXT", x))))]
}
# replace the column names with the answer indices
if (should_use_ofr) {
# if we should use the original_first_rows,
# then replace the column names with the import tags,
# then remove the question-ID from those import tags,
# leaving only the choice indices
question_id <- question[['Payload']][['QuestionID']]
} else {
# create a list of possible variations of the data export tag
data_export_tag <-
paste0("^", question[['Payload']][['DataExportTag']])
data_export_tags <-
c(
data_export_tag,
gsub("#", "_", data_export_tag),
gsub("-", "_", data_export_tag)
)
data_export_tags <-
unique(c(paste0(data_export_tags, "_"), data_export_tags))
data_export_tags <- paste0(data_export_tags, collapse = "|")
# remove the data export tag from the response column names
colnames(relevant_responses) <-
lapply(colnames(relevant_responses), function(x)
gsub(data_export_tags, "", x))
# if the question is taken from a side-by-side question,
# then it has an AnswerDataExportTag that
# needs removed from the end of the column names
if ("AnswerDataExportTag" %in% names(question[['Payload']])) {
colnames(relevant_responses) <-
lapply(colnames(relevant_responses), function(x)
gsub(
paste0("_",
question[['Payload']][['AnswerDataExportTag']],
"$"),
"", x))
}
# if there's ChoiceDataExportTags being used,
# translate the column names from recode values
# to answer indices
if ("ChoiceDataExportTags" %in% names(question[['Payload']]) &&
typeof(question[['Payload']][['ChoiceDataExportTags']]) != "logical" &&
all(colnames(relevant_responses) %in%
question[['Payload']][['ChoiceDataExportTags']])) {
colnames(relevant_responses) <-
lapply(colnames(relevant_responses), function(x) {
names(question[['Payload']][['ChoiceDataExportTags']])[
which(question[['Payload']][['ChoiceDataExportTags']] == x)]
})
}
}
# determine if the question has any NA-type choices
if ('RecodeValues' %in% names(question[['Payload']])) {
has_na <- any(question[['Payload']][['RecodeValues']] < 0)
} else
has_na <- FALSE
# calculate the valid denominator for each answer
valid_denominator <-
apply(relevant_responses, 2, function(x)
sum(x >= 0))
# calculate the total denominator for each answer
total_denominator <-
apply(relevant_responses, 2, function(x)
sum(x != -99 & x != ""))
# get the na responses for the question, if it has NA responses
if (has_na) {
na_factors <-
question[['Payload']][['RecodeValues']][
which(question[['Payload']][['RecodeValues']] < 0)]
}
# get the valid responses for the question
if ("RecodeValues" %in% names(question[['Payload']]) &&
length(question[['Payload']][['RecodeValues']]) > 0) {
valid_factors <-
question[['Payload']][['RecodeValues']][
which(question[['Payload']][['RecodeValues']] >= 0)]
} else {
valid_factors <- names(question[['Payload']][['Answers']])
}
# table the responses
valid_responses <-
sapply(relevant_responses, function(x)
table(factor(x, valid_factors)))
if (!is.data.frame(valid_responses)) {
valid_responses <- as.data.frame(valid_responses)
}
if (!(length(colnames(relevant_responses)) == length(rownames(valid_responses))
&&
all(colnames(relevant_responses) == rownames(valid_responses)))) {
valid_responses <- t(valid_responses)
#Think these should already be true?
colnames(valid_responses) <- valid_factors
rownames(valid_responses) <- colnames(relevant_responses)
}
# } else valid_responses <- t(valid_responses)
if (has_na) {
na_responses <-
sapply(relevant_responses, function(x)
table(factor(x, na_factors)))
if (!is.data.frame(na_responses)) {
na_responses <- as.data.frame(na_responses)
}
if (!(
nrow(na_responses) == ncol(relevant_responses) &&
ncol(na_responses) == length(na_factors)
)) {
na_responses <- t(na_responses)
}
colnames(na_responses) <- na_factors
rownames(na_responses) <- colnames(relevant_responses)
}
valid_responses <- as.data.frame(valid_responses)
# convert the number of respondents for each answer
# (row) by choice (column) combination
# to a percentage
for (i in 1:nrow(valid_responses)) {
for (j in 1:ncol(valid_responses)) {
if (valid_responses[i, j] == 0 | valid_denominator[[i]] == 0) {
valid_responses[i, j] <- percent0(0)
} else {
valid_responses[i, j] <-
percent0(as.integer(valid_responses[i, j]) / valid_denominator[[i]])
}
}
}
# if there's a set of na_responses
# convert the number of respondents for each
# answer (row) by choice (column) combination
# to a percentage
if (has_na) {
for (i in 1:nrow(na_responses)) {
for (j in 1:ncol(na_responses)) {
if (na_responses[i, j] == 0 | total_denominator[[i]] == 0) {
na_responses[i, j] <- percent0(0)
} else {
na_responses[i, j] <-
percent0(as.integer(na_responses[i, j]) / total_denominator[[i]])
}
}
}
}
# translate the recode values to choice indices
if ("RecodeValues" %in% names(question[['Payload']]) &&
colnames(valid_responses) %in% question[['Payload']][['RecodeValues']]) {
colnames(valid_responses) <-
lapply(colnames(valid_responses), function(x) {
names(question[['Payload']][['RecodeValues']])[
which(question[['Payload']][['RecodeValues']] == x)]
})
if (has_na) {
colnames(na_responses) <-
lapply(colnames(na_responses), function(x) {
names(question[['Payload']][['RecodeValues']])[
which(question[['Payload']][['RecodeValues']] == x)]
})
}
}
# Reorder using the AnswerOrder
if ("AnswerOrder" %in% names(question[['Payload']]) &&
should_use_ofr) {
if (has_na) {
answers <-
sapply(unlist(question[['Payload']][['AnswerOrder']]), function(x)
question[['Payload']][['RecodeValues']][[toString(x)]])
valid_answers <- which(answers >= 0)
valid_responses <- valid_responses[, valid_answers]
} else {
valid_responses <-
valid_responses[, unlist(question$Payload$AnswerOrder)]
}
}
# translate the choice indices to choice text
colnames(valid_responses) <-
lapply(colnames(valid_responses), function(x)
question[['Payload']][['Answers']][[x]][[1]])
if (has_na)
colnames(na_responses) <-
lapply(colnames(na_responses), function(x)
question[['Payload']][['Answers']][[x]][[1]])
colnames(valid_responses) <-
lapply(colnames(valid_responses), clean_html)
if (has_na)
colnames(na_responses) <-
lapply(colnames(na_responses), clean_html)
#Lines added from above to rename row values
#EM insertion
rownames(valid_responses) <-
lapply(rownames(valid_responses), function(x)
original_first_rows[2, x])
rownames(valid_responses) <-
lapply(rownames(valid_responses), function(x)
gsub(paste0(question_id, "-"), "", x))
# get the answer text as a list
choices <- rownames(valid_responses)
if ('ChoiceDataExportTags' %in% names(question[['Payload']]) &&
typeof(question[['Payload']][['ChoiceDataExportTags']]) != 'logical' &&
rownames(valid_responses) %in%
question[['Payload']][['ChoiceDataExportTags']]) {
choices <-
lapply(choices, function(x)
names(question[['Payload']][['ChoiceDataExportTags']])[
which(question[['Payload']][['ChoiceDataExportTags']] == x)])
}
choices <-
lapply(choices, function(x)
question[['Payload']][['Choices']][[x]][[1]])
choices <- lapply(choices, clean_html)
choices <- unlist(choices, use.names = FALSE)
# construct the data frame
if (has_na) {
results_table <-
data.frame(
choices,
N = valid_denominator,
valid_responses,
total_N = total_denominator,
na_responses,
check.names = FALSE,
row.names = NULL
)
} else {
results_table <-
data.frame(
choices,
N = valid_denominator,
valid_responses,
check.names = FALSE,
row.names = NULL
)
}
# clean up the colnames and rownames
colnames(results_table)[1] <- ""
rownames(results_table) <- NULL
# append the results table
question[['Table']] <- results_table
return(question)
}
#' Create the Results Table for a Matrix Multiple Answer Question
#'
#' @inheritParams mc_single_answer_results
#' @return a table with the matrix-sub-questions listed in the first column,
#' a column with the total number of respondents for each subquestion, and
#' the percentages for each choice for each sub-question.
matrix_multiple_answer_results <-
function(question, original_first_rows) {
# save the original responses
orig_responses <- question[['Responses']]
# determine if we should use the original_first_rows
if (!missing(original_first_rows) &&
all(colnames(orig_responses) %in% colnames(original_first_rows)) &&
dim(original_first_rows)[[1]] >= 2) {
should_use_ofr <- TRUE
} else
should_use_ofr <- FALSE
# search either the import IDs or response column names
# for the string "TEXT", and include all but those including
# "TEXT" from the relevant_responses
if (should_use_ofr) {
relevant_responses <-
orig_responses[which(unlist(lapply(colnames(orig_responses), function(x)
! grepl("TEXT", original_first_rows[2, x]))))]
} else {
relevant_responses <-
orig_responses[which(unlist(lapply(colnames(orig_responses), function(x)
! grepl("TEXT", x))))]
}
# create a list of possible variations of the data export tag
data_export_tag <- question[['Payload']][['DataExportTag']]
data_export_tags <-
c(data_export_tag,
gsub("#", "_", data_export_tag),
gsub("-", "_", data_export_tag))
data_export_tags <-
unique(c(paste0(data_export_tags, "_"), data_export_tags))
data_export_tags <- paste0(data_export_tags, collapse = "|")
# remove the QID or Question Data Export Tags from the columns
# now columns look like "1_4" and "avocados_10"
colnames(relevant_responses) <-
lapply(colnames(relevant_responses), function(x) {
if (should_use_ofr) {
choice_index <- original_first_rows[2, x]
choice_index <- gsub("^QID[# 0-9]*-", "", choice_index)
} else {
x <- gsub(data_export_tags, "", x)
}
})
# if the question is reduced from a side-by-side question,
# then we need to remove the AnswerDataExportTag from the
# end of the column names
if ("AnswerDataExportTag" %in% names(question[['Payload']])) {
names(relevant_responses) <-
gsub(paste0("_", question[['Payload']][['AnswerDataExportTag']]),
"", names(relevant_responses))
}
# separate the response columns into a list for each
# answer row
responses_by_answers <- list()
answer_codes <-
sapply(names(relevant_responses), function(x)
gsub("[_ -][-0-9]+$", "", x, perl = TRUE))
for (ans in unique(answer_codes)) {
# for each answer, save response columns that start with that answer.
responses_by_answers[[ans]] <- list()
responses_by_answers[[ans]][['responses']] <- list()
responses_by_answers[[ans]][['responses']] <-
relevant_responses[
which(grepl(paste0("^", ans, "[_ -]"), names(relevant_responses)))]
# remove the answer from the column name
colnames(responses_by_answers[[ans]][['responses']]) <-
sapply(colnames(responses_by_answers[[ans]][['responses']]),
function(x)
gsub(paste0(ans, "[_ -]"), "", x))
# calculate the total number of valid respondents, N
responses_by_answers[[ans]][['N']] <-
length(which(apply(responses_by_answers[[ans]][['responses']],
1,
function(x)
any(x != 0 &
x != "" & x != -99))))
# if there are RecodeValues indicating NA like options,
# separate them from the valid respondents, recalculate N,
# and calculate total_N
if ("RecodeValues" %in% names(question[['Payload']]) &&
suppressWarnings(any(question[['Payload']][['RecodeValues']] < 0))) {
# if the response columns have use recode values
# to represent the choices
if (all(names(responses_by_answers[[ans]][['responses']]) %in%
question[['Payload']][['RecodeValues']])) {
# separate na_columns from the valid responses
responses_by_answers[[ans]][['na_columns']] <- list()
responses_by_answers[[ans]][['na_columns']] <-
responses_by_answers[[ans]][['responses']][
which(names(responses_by_answers[[ans]][['responses']]) < 0)]
responses_by_answers[[ans]][['responses']] <-
responses_by_answers[[ans]][['responses']][
which(names(responses_by_answers[[ans]][['responses']]) >= 0)]
# otherwise, if the response columns use the
# Choices directly to represent the choices
} else if (all(names(responses_by_answers[[ans]][['responses']]) %in%
names(question[['Payload']][['RecodeValues']]))) {
# separate na_columns from the valid responses
na_cols <-
which(sapply(
names(responses_by_answers[[ans]][['responses']]),
function(x)
suppressWarnings(question[['Payload']][['RecodeValues']][[x]] < 0)))
responses_by_answers[[ans]][['na_columns']] <- list()
responses_by_answers[[ans]][['na_columns']] <-
responses_by_answers[[ans]][['responses']][na_cols]
responses_by_answers[[ans]][['responses']] <-
responses_by_answers[[ans]][['responses']][
which(!names(responses_by_answers[[ans]][['responses']]) %in% na_cols)]
}
# calculate the total_N and recalculate N to be the number of
# valid respondents only
responses_by_answers[[ans]][['total_N']] <-
responses_by_answers[[ans]][['N']]
responses_by_answers[[ans]][['N']] <-
length(which(apply(responses_by_answers[[ans]][['responses']],
1,
function(x)
any(x == 1))))
}
# turn the responses for this answer into a list of percentages
# for each choice
responses_by_answers[[ans]][['responses']] <-
apply(responses_by_answers[[ans]][['responses']],
2,
function(x) {
if (sum(x != -99 &
x != 0 &
x != "") != 0 &&
responses_by_answers[[ans]][['N']] != 0) {
percent0(sum(x != -99 &
x != 0 &
x != "") / responses_by_answers[[ans]][['N']])
} else
percent0(0)
})
if (all(c('na_columns', 'total_N') %in% names(responses_by_answers[[ans]]))) {
# turn the not-applicable columns for this answer into a
# list of percentages for each non-applicable choice
responses_by_answers[[ans]][['na_columns']] <-
apply(responses_by_answers[[ans]][['na_columns']],
2,
function(x) {
if (sum(x != -99 &
x != 0 &
x != "") != 0 &&
responses_by_answers[[ans]][['total_N']] != 0) {
percent0(sum(x != -99 &
x != 0 &
x != "") /
responses_by_answers[[ans]][['total_N']])
} else
percent0(0)
})
}
}
# if there's only one column to the question, then we will use the number of
# respondents who answered any part of the question as the denominator, instead
# of the number of respondents who answered that specific question part.
if (all(lapply(responses_by_answers, function(x)
length(x[['responses']])) == 1)) {
all_question_respondents <- length(which(
apply(relevant_responses,
1,
function(x)
any(x != 0 &
x != -99 &
x != ""))))
for (ans in answer_codes) {
if (responses_by_answers[[ans]][['N']] != 0 &
all_question_respondents != 0) {
responses_by_answers[[ans]][['responses']] <-
percent0(responses_by_answers[[ans]][['N']] /
all_question_respondents)
} else
percent0(0)
}
}
# get the list of Ns for every answer, get the percents for each choices
# for each answer, and table them.
N <- lapply(responses_by_answers, '[[', 'N')
choices <-
t(as.data.frame(lapply(
responses_by_answers, '[[', 'responses'
),
optional = TRUE))
# if there's only one option, then the choice will need manual naming because
# R has trouble differentiating between single column data frames and lists
only_one_option <-
ncol(choices) == 1 &&
length(unique(gsub(
"[0-9 _ -]*-", "", colnames(relevant_responses)
))) == 1
if (only_one_option)
colnames(choices) <-
gsub("[0-9 _ -]*-", "", colnames(relevant_responses)[[1]])
# this is a helper function for renaming the choices based on the
# recodevalues, if appropriate. Insights uses the recodevalues reliably,
# but legacy didn't. So the test is whether or not all the choices are
# in the recodevalues.
rename_choices <- function(choice_names, question) {
if ("RecodeValues" %in% names(question[['Payload']]) &&
all(choice_names %in% question[['Payload']][['RecodeValues']])) {
sapply(choice_names, function(x) {
code <-
names(question[['Payload']][['RecodeValues']])[
which(question[['Payload']][['RecodeValues']] == x)]
question[['Payload']][['Answers']][[code]][[1]]
})
} else {
sapply(choice_names, function(x) {
question[['Payload']][['Answers']][[x]][[1]]
})
}
}
# relabel the names of the valid choices
colnames(choices) <- rename_choices(colnames(choices), question)
# if there were non-applicable options, table them, re-label the choices,
# then table the N, choices, total_N, and na_choices together.
# if there weren't non-applicable options, just table the
# valid N and the choices
if (all(c('total_N', 'na_columns') %in% sapply(responses_by_answers, names))) {
na_choices <-
t(as.data.frame(
lapply(responses_by_answers, '[[', 'na_columns'),
optional = TRUE
))
total_N <- lapply(responses_by_answers, '[[', 'total_N')
colnames(na_choices) <-
rename_choices(colnames(na_choices), question)
responses_tabled <-
cbind(N = as.integer(N),
choices,
total_N = as.integer(total_N),
na_choices)
} else {
responses_tabled <- cbind(N = as.integer(N), choices)
}
# if the rownames are labeled by the Choice Data Export Tags, use them to convert
# their labels to the question parts' names.
# if not, just try to use the labels directly to retrieve the part's labeling.
if ('ChoiceDataExportTags' %in% names(question[['Payload']]) &&
all(rownames(responses_tabled) %in%
question[['Payload']][['ChoiceDataExportTags']])) {
rownames(responses_tabled) <-
sapply(rownames(responses_tabled), function(x) {
code <-
names(question[['Payload']][['ChoiceDataExportTags']])[
which(question[['Payload']][['ChoiceDataExportTags']] == x)]
question[['Payload']][['Choices']][[code]][[1]]
})
} else {
rownames(responses_tabled) <-
sapply(rownames(responses_tabled),
function(x)
question[['Payload']][['Choices']][[x]][[1]])
}
# clean html out of the colnames and rownames
rownames(responses_tabled) <-
sapply(rownames(responses_tabled), clean_html)
colnames(responses_tabled) <-
sapply(colnames(responses_tabled), clean_html)
# include the rownames as the first row
responses_tabled <-
cbind(rownames(responses_tabled), responses_tabled)
colnames(responses_tabled)[1] <- " "
question[['Table']] <- responses_tabled
return(question)
}
#' Append the Response Frequency Table to a Question
#'
#' This function uses the contents of a question to
#' determine which kind of question the given question is.
#' Then it passes the question and the original_first_rows
#' to the function which processes questions of that specific
#' question type. After adding the results `Table` to the question,
#' the question is returned including this table as an additional new
#' list element.
#'
#' @inheritParams mc_single_answer_results
process_question_results <-
function(question, original_first_rows) {
# get original_first_rows from global scope if not passed directly
if (missing(original_first_rows))
original_first_rows <-
get("original_first_rows", envir = globalenv())
# we should only use original_first_rows if they're greater than 2 rows
if (!missing(original_first_rows) &&
nrow(original_first_rows) >= 2) {
should_use_ofr <- TRUE
} else
should_use_ofr <- FALSE
# Only process questions which have results
if (is.null(question[['Responses']])) {
has_responses <- FALSE
} else {
has_responses <- ncol(question[['Responses']]) != 0
}
if (has_responses) {
question[['Table']] <- NULL
try({
# multiple choice multiple answer
if (is_mc_multiple_answer(question)) {
if (should_use_ofr) {
question <-
mc_multiple_answer_results(question, original_first_rows)
} else {
question <- mc_multiple_answer_results(question)
}
# multiple choice single answer
} else if (is_mc_single_answer(question)) {
if (should_use_ofr) {
question <- mc_single_answer_results(question, original_first_rows)
} else {
question <- mc_single_answer_results(question)
}
# matrix multiple answer
} else if (is_matrix_multiple_answer(question)) {
if (should_use_ofr) {
question <-
matrix_multiple_answer_results(question, original_first_rows)
} else {
question <- matrix_multiple_answer_results(question)
}
# matrix single answer
} else if (is_matrix_single_answer(question)) {
if (should_use_ofr) {
question <-
matrix_single_answer_results(question, original_first_rows)
} else {
question <- matrix_single_answer_results(question)
}
}
}, silent = TRUE)
}
return(question)
}
#' Create Results Tables and Pair Them to Questions
#'
#' The generate_results function takes a list of questions which have
#' their responses paired to them, determines their question type,
#' uses the results generation functions to create their results table,
#' and saves the table to the question's [['Table']] element. The function
#' returns the list of questions with their paired results tables.
#'
#' @param questions A list of questions from a Qualtrics Survey File containing
#' response columns (inserted by link_responses_to_questions).
#' @inheritParams mc_single_answer_results
#'
#' @return A list of questions with their results tables paired to them
#' under the questions[[i]][['Table']]
generate_results <- function(questions, original_first_rows) {
# loop through all the questions that have responses,
# and for each question that has responses, determine
# it's question type (among the ones which have question
# results generating functions), then generate the results for
# that question and save them to that question.
for (i in 1:length(questions)) {
questions[[i]] <-
process_question_results(questions[[i]], original_first_rows)
}
return(questions)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.