process_kindle_highlights <- function(fpath, contains_quotes = FALSE, find_vocab = TRUE) {
# Convert exported .txt file created by Bookcision to a clean plain text file. Instructions
# for Bookcision found here: https://readwise.io/bookcision
#
# Arguments:
# fpath {char} -- path to exported .txt Kindle highlights file
# contains_quotes {logical} -- do the highlights contain quotations (indented text blocks
# on Kindle)?
# find_vocab {logical} -- if TRUE, look for highlights that are made up of only a single word,
# and these will be written to a separate vocab list
#
# Returns:
# nothing -- writes output file
# Extract highlights
notes = readLines(fpath)
notes = paste0(notes, collapse="\n")
notes = strsplit(notes, "\n\n")[[1]]
notes = gsub("(.*)\\n(LOCATION: \\d+)", "\\1 [\\2]", notes)
# Clean notes
notes = gsub("’|‘", "'", notes)
notes = gsub("“|”", '"', notes)
notes = gsub("…", "...", notes)
# If highlights contain quotes, the highlight will then be in the format:
# <TEXT>\.\d+ <NAME_OF_QUOTE_AUTHOR>$
if (contains_quotes) {
notes = gsub("(.*)(\\.|\\?|\\!)(\\d+ )(.*)( \\[LOCATION: \\d+\\])$", "\\1\\2 [QUOTE: \\4]\\5", notes)
}
# If highlights contain vocab (single-word highlights), separate to new char vector without
# LOCATION included
if (find_vocab) {
vocab_loc = which(sapply(gsub("(.*)( \\[LOCATION: \\d+\\])$", "\\1", notes), function(x) {
words = strsplit(x, " ")[[1]]
return (ifelse(length(words) == 1, TRUE, FALSE))
}))
if (length(vocab_loc)) {
vocab = tools::toTitleCase(names(vocab_loc))
vocab = gsub("(.*)([[:punct:]])$", "\\1", vocab)
notes = notes[-unname(vocab_loc)]
}
}
# Extract title of book from exported filename
title = gsub("(Kindle\\.Highlights_)(.*)(_\\d+)(\\.txt)", "\\2", basename(fpath))
title = gsub("\\.", " ", title)
writeLines(notes, file.path(dirname(fpath), paste0(title, " Kindle Highlights.txt")))
if (find_vocab) {
if (length(find_vocab)) {
writeLines(vocab, file.path(dirname(fpath), paste0(title, " Vocab.txt")))
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.