Nothing
#' handcoder_app: App to classify text snippets
#'
#' `handcoder_app` is an internal function to `handcode`. It provides a Shiny-App that allows users to click through different snippets of text and classify them manually according to predefined categories.
#'
#' @param a A list containing all relevant information for the app to function. The list will be automatically generated by `data_for_app`.
#'
# Importing dependencies with roxygen2
#' @import shiny
#' @importFrom shinyWidgets progressBar
#' @importFrom shinyWidgets updateProgressBar
#' @return The Shiny-App returns a data frame with the coded variables.
handcoder_app <- function(a) {
shinyApp(
ui = shiny::fluidPage(
# Generate html-class that hides output
shiny::tags$head(
shiny::tags$style(
shiny::HTML(".hide-checkbox {display: none;}"),
)
),
shiny::tags$head(
shiny::tags$style(
shiny::HTML("
/* Increase spacing after htmlOutput('statement') */
.statement-output {
margin-bottom: 20px;
}
/* Add box around htmlOutput('statement') */
.statement-output {
border: 1px solid #ccc;
padding: 10px;
}
")
)
),
# Title
shiny::titlePanel("handcodeR Text Annotation App"),
# Main Panel
shiny::mainPanel(
# Text Statement
shiny::h3("Statement:"),
shiny::div(
shiny::htmlOutput("statement"),
class = "statement-output"),
shiny::HTML("<br>"),
# Coding Categories
shiny::fluidRow(
shiny::column(width = floor(12/length(a$classifications)),
shiny::radioButtons(
"code1",
names(a$classifications)[1],
choiceNames = button_output(a$classifications, 1),
choiceValues = button_output(a$classifications, 1, names = TRUE),
selected = ""
)
),
# Invisible checkbox for optional categories
shiny::div(shiny::checkboxInput(
"add2",
"Add second classification",
value = length(a$classifications)>1),
class = "hide-checkbox"
),
shiny::conditionalPanel(
condition = "input.add2",
shiny::column(width = floor(12/length(a$classifications)),
shiny::radioButtons(
"code2",
try(names(a$classifications)[2], silent = TRUE),
selected = "",
choiceNames = button_output(a$classifications, 2),
choiceValues = button_output(a$classifications, 2, names = TRUE),
)
)),
shiny::div(shiny::checkboxInput(
"add3",
"Add third classification",
value = length(a$classifications)>2),
class = "hide-checkbox"
),
shiny::conditionalPanel(
condition = "input.add3",
shiny::column(width = floor(12/length(a$classifications)),
shiny::radioButtons(
"code3",
try(names(a$classifications)[3], silent = TRUE),
selected = "",
choiceNames = button_output(a$classifications, 3),
choiceValues = button_output(a$classifications, 3, names = TRUE),
)
)),
shiny::div(shiny::checkboxInput(
"add4",
"Add fourth classification",
value = length(a$classifications)>3),
class = "hide-checkbox"
),
shiny::conditionalPanel(
condition = "input.add4",
shiny::column(width = floor(12/length(a$classifications)),
shiny::radioButtons(
"code4",
try(names(a$classifications)[4], silent = TRUE),
selected = "",
choiceNames = button_output(a$classifications, 4),
choiceValues = button_output(a$classifications, 4, names = TRUE),
)
)),
shiny::div(shiny::checkboxInput(
"add5",
"Add fifth classification",
value = length(a$classifications)>4),
class = "hide-checkbox"
),
shiny::conditionalPanel(
condition = "input.add5",
shiny::column(width = floor(12/length(a$classifications)),
shiny::radioButtons(
"code5",
try(names(a$classifications)[5], silent = TRUE),
selected = "",
choiceNames = button_output(a$classifications, 5),
choiceValues = button_output(a$classifications, 5, names = TRUE),
)
)),
shiny::div(shiny::checkboxInput(
"add6",
"Add sixth classification",
value = length(a$classifications)>5),
class = "hide-checkbox"
),
shiny::conditionalPanel(
condition = "input.add6",
shiny::column(width = floor(12/length(a$classifications)),
shiny::radioButtons(
"code6",
try(names(a$classifications)[6], silent = TRUE),
selected = "",
choiceNames = button_output(a$classifications, 6),
choiceValues = button_output(a$classifications, 6, names = TRUE),
)
))
),
# Space and Enter to press next and previous
shiny::tags$head(
shiny::tags$script(
'$(document).on("keyup", function (e) {
if ( e.which == 32 ) document.getElementById("previouspage").click();
if ( e.which == 13 ) document.getElementById("nextpage").click();
});'
)
),
shiny::HTML("<br><br>"),
# Buttons for next and previous
shiny::actionButton("previouspage","Previous", class = "btn btn-primary",
style = "position: absolute; bottom: 60px; right: 90px;",
icon = shiny::icon("backward")),
shiny::actionButton("nextpage", "Next", class = "btn btn-primary",
style = "position: absolute; bottom: 60px; right: 10px;",
icon = shiny::icon("forward")),
# Button for save and exit
shiny::actionButton("save", "Save and exit", class = "btn btn-primary",
style = "position: absolute; bottom: 60px; left: 10px;",
icon = shiny::icon("floppy-disk")),
# Help Text
shiny::helpText(
sprintf(
'Use Space and Enter keys to go to "%s" or "%s" page, respectively.',
"Previous",
"Next"
)
),
# Progress bar
shinyWidgets::progressBar(
id = "progress",
value = a$start_app,
total = nrow(a$data_app),
size = "xxs"
),
tags$head(tags$style(HTML('.progress-number {position: absolute; bottom: -10px; right: -10px; color: black;}'))),
tags$head(tags$style(HTML('.progress {height: 6px; margin-top: -21px;}')))
)
),
server = function(input, output, session) {
# Initialize reactiveValues
values <- shiny::reactiveValues(
# Set counter to start
counter = a$start_app,
# Initialize coding as existing classification
code1 = a$container[,1],
code2 = a$container[,2],
code3 = a$container[,3],
code4 = a$container[,4],
code5 = a$container[,5],
code6 = a$container[,6]
)
# Initialize radioButtons
observe({
shiny::updateRadioButtons(session, "code1", selected = values$code1[values$counter])
shiny::updateRadioButtons(session, "code2", selected = values$code2[values$counter])
shiny::updateRadioButtons(session, "code3", selected = values$code3[values$counter])
shiny::updateRadioButtons(session, "code4", selected = values$code4[values$counter])
shiny::updateRadioButtons(session, "code5", selected = values$code5[values$counter])
shiny::updateRadioButtons(session, "code6", selected = values$code6[values$counter])
})
# Behaviour when previous is clicked
shiny::observeEvent(input$previouspage, {
# Save current coding
values$code1[values$counter] <- input$code1
try(values$code2[values$counter] <- input$code2, silent = TRUE)
try(values$code3[values$counter] <- input$code3, silent = TRUE)
try(values$code4[values$counter] <- input$code4, silent = TRUE)
try(values$code5[values$counter] <- input$code5, silent = TRUE)
try(values$code6[values$counter] <- input$code6, silent = TRUE)
# Update the counter value by substracting 1 but only if page is bigger 1
if (values$counter > 1) {
values$counter <- values$counter - 1
}
# Set the selected value of radioButtons to "" or already coded category
shiny::updateRadioButtons(session, "code1", selected = ifelse(is.na(values$code1[values$counter]), "", values$code1[values$counter]))
shiny::updateRadioButtons(session, "code2", selected = ifelse(is.na(values$code2[values$counter]), "", values$code2[values$counter]))
shiny::updateRadioButtons(session, "code3", selected = ifelse(is.na(values$code3[values$counter]), "", values$code3[values$counter]))
shiny::updateRadioButtons(session, "code4", selected = ifelse(is.na(values$code4[values$counter]), "", values$code4[values$counter]))
shiny::updateRadioButtons(session, "code5", selected = ifelse(is.na(values$code5[values$counter]), "", values$code5[values$counter]))
shiny::updateRadioButtons(session, "code6", selected = ifelse(is.na(values$code6[values$counter]), "", values$code6[values$counter]))
# Update progress bar
shinyWidgets::updateProgressBar(id = "progress", value = values$counter, total = nrow(a$data_app))
})
# Behaviour when next is clicked
shiny::observeEvent(input$nextpage, {
# Save current coding
values$code1[values$counter] <- input$code1
try(values$code2[values$counter] <- input$code2, silent = TRUE)
try(values$code3[values$counter] <- input$code3, silent = TRUE)
try(values$code4[values$counter] <- input$code4, silent = TRUE)
try(values$code5[values$counter] <- input$code5, silent = TRUE)
try(values$code6[values$counter] <- input$code6, silent = TRUE)
# If max pages reached, save and exit
if (values$counter == length(a$data_app$texts)) {
# Stop the Shiny-App and output data frame
shiny::stopApp(invisible(gen_output(a$data_app, values)))
}
# Update the counter value by adding 1
values$counter <- values$counter + 1
# Set the selected value of radioButtons to "" or already coded category
shiny::updateRadioButtons(session, "code1", selected = ifelse(is.na(values$code1[values$counter]), "", values$code1[values$counter]))
shiny::updateRadioButtons(session, "code2", selected = ifelse(is.na(values$code2[values$counter]), "", values$code2[values$counter]))
shiny::updateRadioButtons(session, "code3", selected = ifelse(is.na(values$code3[values$counter]), "", values$code3[values$counter]))
shiny::updateRadioButtons(session, "code4", selected = ifelse(is.na(values$code4[values$counter]), "", values$code4[values$counter]))
shiny::updateRadioButtons(session, "code5", selected = ifelse(is.na(values$code5[values$counter]), "", values$code5[values$counter]))
shiny::updateRadioButtons(session, "code6", selected = ifelse(is.na(values$code6[values$counter]), "", values$code6[values$counter]))
shinyWidgets::updateProgressBar(id = "progress", value = values$counter, total = nrow(a$data_app))
})
# Update text displayed
if(a$context_app){
current_text <- reactive({
paste0("<font color =\"#C0C0C0\">", a$data_app$before[values$counter], "</font> <b>", a$data_app$texts[values$counter], "</b> <font color =\"#C0C0C0\">", a$data_app$after[values$counter], "</font>")
})} else {
current_text <- reactive({
a$data_app$texts[values$counter]
})
}
output$statement <- renderText({
current_text()
})
# Behaviour when save is clicked
shiny::observeEvent(input$save, {
# Save current coding
values$code1[values$counter] <- input$code1
try(values$code2[values$counter] <- input$code2, silent = TRUE)
try(values$code3[values$counter] <- input$code3, silent = TRUE)
try(values$code4[values$counter] <- input$code4, silent = TRUE)
try(values$code5[values$counter] <- input$code5, silent = TRUE)
try(values$code6[values$counter] <- input$code6, silent = TRUE)
# Stop the Shiny-App and output data frame
shiny::stopApp(invisible(gen_output(a$data_app, values)))
})
}
)
}
#' button_output: Formatting of output for shiny::radioButtons
#'
#' `button_output` is an internal function to `handcode`. It works within the Shiny-App `handcoder_app` to format the look of the shiny::radioButtons.
#' @param classification Classification list obtained as list item from `data_for_app`
#' @param button Numerical value indicating for which button names and labels should be returned
#' @param names Logical value indicating whether the function is supposed to return a formatted list of labels or a vector of names for use in radioButtons.
#' @return Depending on the logical names input, the function returns a list of HTML formatted labels or the list of category names for the display of the radioButtons in the Shiny-App.
button_output <- function(classification, button, names = FALSE) {
# If
if(button <= length(classification)){
class <- classification[[button]]
if(!names) {
class <- as.list(class)
# Pick all Missings
log <- grepl("\\_(.)*\\_", class)
# Last Missing
marg <- max(which(log), warning = FALSE)
# Missings without _
no_ <- gsub("\\_", "", class)
# Apply HTML to all missings
class[log] <- lapply(no_[log], function(x) shiny::HTML(paste0("<p style = 'color:#C0C0C0'>", x, "</p>")))
class[marg] <- lapply(no_[marg], function(x) shiny::HTML(paste0("<p style = 'color:#C0C0C0; margin-bottom:15pt'>", x, "</p>")))
}
} else {
class <- c("")
}
return(class)
}
#' gen_output: Combine data and values of Shiny-App to output
#'
#' `gen_output` is an internal function to `handcode`. It works within the Shiny-App `handcoder_app` to prepare the output data frame.
#' @param data The data frame initially passed to `handcoder_app` within a.
#' @param values The values generated by the server of the `handcoder_app`.
#' @return The function returns a data frame with the coded variables.
gen_output <- function(data, values){
# Generate final data frame to be displayed as output
final <- data.frame(id = data$id, texts = data$texts, kat1 = values$code1, kat2 = values$code2, kat3 = values$code3, kat4 = values$code4, kat5 = values$code5, kat6 = values$code6)
# Reduce to size of original data frame
final <- final[,seq_len(ncol(data)-2)]
# Take names from original data frame
names(final) <- names(data)[-c(2,3)]
# Make sure all NA is saved as ""
final[is.na(final)] <- ""
# Reorder output
final <- final[order(final$id),]
# Delete id
final <- final[,-1]
rownames(final) <- NULL
# Return
return(final)
}
#' character_to_data: Transform text vector and arg_list to data frame.
#'
#' `character_to_data` is an internal function to `handcode`. It takes a character vector of texts as well as an arg_list of named character vectors as inputs and returns a data frame which can be used as input to `data_for_app`.
#' @param data A character vector of texts which has been given as data input to `handcode`.
#' @param arg_list A list of additional arguments which have been given as input to `handcode`. These must be named character vectors of categories that will be used to annotate the given texts.
#' @param missing A character vector of values that are displayed as missing values in the App. In the return data, these missing will be saved with an additional "_" as prefix and suffix.
#' @return The function returns a data frame in the format of the output data frame that can be processed by the handcode() function.
character_to_data <- function(data, arg_list, missing) {
output <- data.frame(matrix(factor(""), nrow = length(data), ncol = length(arg_list)))
# Name variables according to arg_list
names(output) <- names(arg_list)
# Add _ to missing
missing <- paste0("_", missing, "_")
# For every categorisation, set levels of factor
for (i in seq_along(arg_list)){
output[,i] <- factor("", levels = c("", missing, arg_list[[i]]))
}
# Paste texts object to data frame
data <- data.frame(texts = data, output)
return(data)
}
#' data_for_app: Prepare inputs for Shiny-App
#'
#' `data_for_app` is an internal function to `handcode`. It takes the inputs data, start, randomize and context to generate a list of inputs which can then be passed to `handcoder_app`.
#' @param data A data frame that has been prepared by `character_to_data` or that has previously already been returned from `handcode`.
#' @param start The value that has been given as start value to `handcode`.
#' @param randomize The logical value that has been given as randomize to `handcode`.
#' @param context The logical value that has been given as context to `handcode`.
#' @param pre Optional vector of texts that come before each respective text to be coded.
#' @param post Optional vector of texts that come after each respective text to be coded.
#' @return The function returns a list of inputs needed within the Shiny-App.
data_for_app <- function(data, start, randomize, context, pre = NULL, post = NULL) {
a <- list()
if(is.null(pre) & is.null(post)){
# Add context to data frame
data <- data.frame(before = c("", data$texts[seq_len(nrow(data)-1)]), after = c(data$texts[seq(2, nrow(data))], ""), data)
} else {
data <- data.frame(before = pre, after = post, data)
}
# Add id variable to data
data <- cbind(id = seq_len(nrow(data)), data)
# if start == "all_empty", reorder data
if(start == "all_empty"){
data <- data[order(do.call(paste0,data.frame(data[,-c(1:4)], helper=""))==""), ]
}
# Set start to first empty row of data
if(start %in% c("first_empty", "all_empty")){
start <- min(seq_len(nrow(data))[do.call(paste0,data.frame(data[,-c(1:4)], helper=""))==""])
}
# If randomize is TRUE, randomize order after start value
# c(1:start-1, ...) is intentionally without parantheses due to different behaviour of c(1:1) and c(1:0)
if(randomize & start < nrow(data)){
data <- data[c(1:start-1, sample(start:nrow(data), nrow(data)-(start-1))),]
}
# List to store classifications and their categories
classifications <- vector("list", length = ncol(data)-4)
# Name list
names(classifications) <- names(data)[-c(1:4)]
# Fill with categories
for (i in seq_along(classifications)) {
classifications[[i]] <- levels(data[,i+4])
}
# Initialize container for classification
container <- data.frame(kat1 = factor(rep("", nrow(data))), kat2 = factor(""), kat3 = factor(""), kat4 = factor(""), kat5 = factor(""), kat6 = factor(""))
for (i in seq_along(classifications)){
container[,i] <- data[,i+4]
names(container)[i] <- names(classifications)[[i]]
levels(container[,i]) <- c(classifications[[i]])
}
# Pass to app
a$container <- container
a$data_app <- data
a$start_app <- start
a$classifications <- classifications
a$context_app <- context
return(a)
}
#' handcode: Classifying text into pre-defined categories.
#'
#' `handcode` opens a Shiny-App which allows for hand coding strings of text into pre-defined categories. You can code between one and three variables at a time. It returns an updated data frame with your annotated classifications.
#'
#' @param data A character vector of texts you want to annotate or a data frame returned from the handcode() function.
#' @param ... Between one and six named character vectors indicating different variables and categories you want to use for your annotation. Only needed if data a new character vector of texts.
#' @param start A numeric value indicating the line in which you want to start hand coding. Alternatively, you can set start to "first_empty" to automatically start hand coding in the first line that has not been coded yet, or to "all_empty" to display all lines that have not been coded yet.
#' @param randomize A logical value indicating whether you want to randomize the order in which texts are shown to the coder.
#' @param context A logical value indicating whether you want the coder to see the previous and next text alongside the text that is currently coded. If TRUE, the function will show the previous and next text in light gray. This option is especially useful if we annotate individual sentences within a larger document.
#' @param missing A character vector of values that are displayed as missing values in the App. In the return data, these missing will be saved with an additional "_" as prefix and suffix.
#' @param pre Optional vector of custom texts that come as previous text before each respective text to be coded. Will be displayed if context = TRUE. This option can be used if the vector of texts specified in data do not form a continuous text.
#' @param post Optional vector of custom texts that come as next text after each respective text to be coded. Will be displayed if context = TRUE. This option can be used if the vector of texts specified in data do not form a continuous text.
#'
#' @return The function returns a data frame containing all annotations that have been made in the Shiny-App.
#' @examplesIf interactive()
#' reviews <- c("Good Quality Dog Food",
#' "Not as Advertised",
#' "Delight says it all",
#' "Great! Just as good as the expensive brands")
#' annotated <- handcode(reviews, evaluation = c("positive", "negative"))
#'
#'
# Importing dependencies with roxygen2
#' @import shiny
#' @importFrom shinyWidgets progressBar
#' @importFrom shinyWidgets updateProgressBar
#'
# Export function
#' @export
handcode <- function(data, ... , start = "first_empty", randomize = FALSE, context = FALSE, missing = c("Not applicable"), pre = NULL, post = NULL) {
# Initialize ...
arg_list <- list(...)
# Checks and datahandling for data input ----------------------------------
# Check if data is either data frame or character vector of texts
if(!is.data.frame(data) & !is.character(data)) stop("data must be a character vector of texts you want to annotate or a data frame that has been returned in an earlier run of this function.")
# Checks and data handling if data is character vector
if(is.character(data)){
# Check if items in arg_list are named vectors
if(!all(vapply(arg_list, is.character, logical(1))) | !all(vapply(arg_list, is.vector, logical(1)))) {
stop("All arguments in ... must be named character vectors.")
}
# Check that there are between 1 and 6 named character vectors given
if(length(arg_list) < 1 | length(arg_list) > 6) {
stop("If data is a character vector of texts to annotate, you must provide between 1 and 6 named character vectors of annotation categories.")
}
# Check if "" is in list of categories
if(any(vapply(arg_list, function(x) "" %in% x, logical(1)))) stop("The default missing value \"\" cannot be part of the categories of any variable you want to code with handcode()." )
# Missing is character vector
if(!is.character(missing)) stop("missing must be a character vector of values you want to be displayed as missing values for your coding categories.")
# Check if duplicates between missing and categories
if(any(vapply(arg_list, function(x) missing %in% x, logical(length(missing))))) stop("Values given as categories in variables you want to code with handcode() cannot be similar to values you give in missing.")
# Check for duplicate categories
for (i in seq_along(arg_list)) {
if(length(unique(arg_list[[i]]))<length(arg_list[[i]])) stop("You cannot set duplicate categories for a variable. Please provide unique categories for classification.")
}
# Data handling
data <- character_to_data(data, arg_list, missing)
}
# Checks -----------------------------------------------------------------------
# Check if first column of data is texts and character
if(names(data)[1] != "texts" | !is.character(data[,1]) ) stop("data must be a character vector of texts you want to annotate or a data frame that has been returned in an earlier run of this function.")
# Check if pre and post exist in data frame. If yes, save them as individual vectors and remove them from data
if(is.data.frame(data) & "pre" %in% names(data) & "post" %in% names(data)) {
pre <- data$pre
post <- data$post
data <- data[,1:(ncol(data)-2)]
}
# Check if all columns except the first one are factors
if(!all(vapply(data[, -1], is.factor, FUN.VALUE = logical(1)))) stop("data must be a character vector of texts you want to annotate or a data frame that has been returned in an earlier run of this function.")
# Check if there are min 1 and max 6 classification variables
if (ncol(data) < 2 | ncol(data) > 7) stop("handcode() is currently only able to handle between one and six classification variables. Please retry using between 1 and six classification variables.")
# check if start is a single value
if(length(start) > 1) stop("start must be a single value.")
# Check if start is numeric or "first_empty"
if(!is.numeric(start) & !start%in%c("first_empty", "all_empty")) stop("start must be numeric, 'first_empty', or 'all_empty'.")
# Check if there is uncoded data when start = "first_empty"
if(all(!do.call(paste0,data.frame(data[,-1], helper=""))=="")) stop("All your data is already classified. Please provide unclassified data if you want to proceed.")
# Check if randomize is single value
if(length(randomize)>1) stop("randomize must be a single value.")
# Check if randomize is logical
if(!is.logical(randomize)) stop("randomize must be either TRUE or FALSE.")
# Check if context is single value
if(length(context)>1) stop("context must be a single value.")
# Check if context is logical
if(!is.logical(context)) stop("context must be either TRUE or FALSE.")
# Check if pre is null or character
if(!is.null(pre) & !is.character(pre)) stop("pre and post must be character vectors.")
# Check if post is null or character
if(!is.null(post) & !is.character(post)) stop("pre and post must be character vectors.")
# If pre is given, check if correct length
if(is.character(pre) & length(pre) != nrow(data)) stop("pre and post must be of the same length as data.")
# If post is given, check if correct length
if(is.character(post) & length(post) != nrow(data)) stop("pre and post must be of the same length as data.")
# If only one of pre and post is given, set up the other one
if(!is.null(pre) & is.null(post)) post <- rep("", nrow(data))
if(!is.null(post) & is.null(pre)) pre <- rep("", nrow(data))
# Check if interactive
if(!interactive()) stop("handcode() can only be used in an interactive R session.")
# Initialize -------------------------------------------------------------------
a <- data_for_app(data, start, randomize, context, pre, post)
# Run App ----------------------------------------------------------------------
ret <- runApp(handcoder_app(a))
# If pre and post given, attach to ret
if(!is.null(pre) & !is.null(post)) {
ret$pre <- pre
ret$post <- post
}
# Output results ----------------------------------------------------------
message("Please cite: Isermann, Lukas. 2023. handcodeR: Text annotation app. R package version 0.1.1. https://github.com/liserman/handcodeR")
return(ret)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.