# setup --------------------------------------------------------------------------------------------------------
# Load dplyr to assist
library(dplyr)
# Load surveyr package
devtools::load_all(export_all = FALSE)
# Import raw data
data <- readxl::read_xlsx("../demo_data.xlsx")
# Show questions
names(data)
# Transform into clean format
new_data <- data %>%
rename("user" = 1,
"q1_position_in_trust" = 2,
"q2_rsc_office_met" = 3,
"q12_main_challenges" = 4,
"q15_not_expectations" = 5,
"q22_changes" = 6,
"q24_reflections" = 7)
# Focussing on Q24: Do you have any other reflections about your meetings with your RSC office?
# Take a look at some of the data for comparisons later
sample <- 27:37
new_data[sample,"q24_reflections"]
# anonymise() ---------------------------------------------------------------------------------------
# See names 'Joe Bloggs' and 'John Doe' - column needs to be anonymised
# Default behaviour
anon_data <- new_data %>%
anonymise(q24_reflections)
anon_data[sample,"q24_reflections"]
# If we want to distinguish between the different people (i.e. for analysis, see if one person mentioned alot etc.)
anon_data <- new_data %>%
anonymise(q24_reflections,
identify = TRUE)
anon_data[sample,"q24_reflections"] # Random numbers assigned
# If we want to make the column gender neutral as well
anon_data <- new_data %>%
anonymise(q24_reflections,
identify = TRUE,
gender = TRUE)
anon_data[sample,"q24_reflections"]
# If we want to anonymise certain names (i.e. we have a list of certain names)
names_to_anonymise <- c("Joe Bloggs", "the RSC")
anon_data <- new_data %>%
anonymise(q24_reflections,
auto = FALSE, # So that the function doesn't automatically overwrite every name it finds
add_names = names_to_anonymise, # Manually defined names to anonymise
identify = TRUE,
gender = TRUE)
anon_data[sample,"q24_reflections"]
# Can also apply to several/all columns via anonymise_at and anonymise_all
anon_df <- new_data %>%
anonymise_at(q24_reflections,
q22_changes,
add_names = "RSC",
identify = TRUE)
anon_df <- new_data %>%
anonymise_all(quietly = TRUE)
# Default behaviour for rest of the demo
anon_data <- new_data %>%
anonymise(q24_reflections)
anon_data[sample,"q24_reflections"]
# clean_column() -----------------------------------------------------------------------------------------
# After anonymising, column needs to be standardised (lower case, punctuation etc.) before it can be analysed
clean_data <- anon_data %>%
clean(q24_reflections)
clean_data[sample,"q24_reflections"]
# If we want to keep 'null responses'
clean_data <- anon_data %>%
clean(q24_reflections,
null_response = FALSE)
clean_data[sample,"q24_reflections"]
# Again, can apply to multiple columns easily
clean_data <- anon_data %>%
clean_at(q24_reflections,
q22_changes,
q12_main_challenges)
clean_data <- anon_data %>%
clean_all()
# common_words() --------------------------------------------------------------------------------------------
# Simple frequency analysis, filters out words that appear less than 5 times so
# filters out groups with only a couple of respondents
clean_data %>%
common_words(q24_reflections)
# More words
clean_data %>%
common_words(q24_reflections,
n = 10)
# Remove certain words
clean_data %>%
common_words(q24_reflections,
remove = c("meeting", "rsc", "trust"),
n = 10)
# Breakdown by demographic
clean_data %>%
common_words(q24_reflections,
q2_rsc_office_met,
remove = c("meeting", "rsc", "trust"),
n = 1)
# To include all groups
clean_data %>%
common_words(q24_reflections,
q2_rsc_office_met,
remove = c("meeting", "rsc", "trust"),
n = 5) # Top 5 words but some are being excluded because they occur less than 5 times
clean_data %>%
common_words(q24_reflections,
q2_rsc_office_met,
remove = c("meeting", "rsc", "trust"),
n = 5,
min = 0) # include top five words regardless of how often they occur
# Stopwords are currently being removed, to keep them in
clean_data %>%
common_words(q24_reflections,
q2_rsc_office_met,
remove_stopwords = FALSE)
# Can also calculate the proportion of responses in each group
# that mentioned the word
clean_data %>%
common_words(q24_reflections,
q2_rsc_office_met,
remove = c("meeting", "rsc", "trust"),
n = 1,
proportion = TRUE)
# Finally, if exporting to external document, can make it more visually
# appealing with pretty='plot' argument
clean_data %>%
common_words(q24_reflections,
q2_rsc_office_met,
proportion = TRUE,
remove = c("meeting", "rsc", "trust"),
n = 2,
pretty = 'return')
# n_grams() --------------------------------------------------------------------------------------------------
# Method of tabulating most common n-grams, removing n_grams that are mostly stopwords
clean_data %>%
n_grams(q24_reflections,
q1_position_in_trust,
remove = c("rsc", "trust", "meeting"),
pretty = 'plot') # defaults to bigrams
# Can decrease the threshold to be stricter on stopwords
clean_data %>%
n_grams(q24_reflections,
stop_thresh = 0) # 0 means exclude n-grams that contain one or more stopwords, 1 means include all n-grams
# Other options
clean_data %>%
n_grams(q24_reflections,
n = 3, # trigrams
word = "meeting", # filter to only include n-grams containing the word 'meeting'
stop_thresh = 0.4) # allow only one stopword per trigram
# Prettify
clean_data %>%
n_grams(q24_reflections,
n = 3, # trigrams
word = "meeting", # filter to only include n-grams containing the word 'better'
stop_thresh = 0.4) %>%
prettify(plot = TRUE)
# sentiment_score() -----------------------------------------
sentiment_data <- clean_data %>%
sentiment_score(q24_reflections)
# themes ----------------------------------------------------------------------
new_data %>%
summarise_topics(q12_main_challenges,
exclude = "rsc",
num_topics = 2)
topics_data <- new_data %>%
clean(q12_main_challenges) %>%
classify_topics(q12_main_challenges,
topic_aliases = c("1" = "finances",
"2" = "growth"),
output = "main_challenges_topic",
confidence = TRUE)
# Example workflow ---------------------------------------------------------------------------------------------
# (After running setup)
new_data %>%
anonymise(q24_reflections,
identify = TRUE) %>%
clean_column(q24_reflections) %>%
common_words(q24_reflections,
q2_rsc_office_met,
remove = c("meeting", "rsc", "trust"),
n = 1) %>%
prettify(alias = c("Which RSC did the respondent meet?" = "q2_rsc_office_met"),
count_bar = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.