R/process_kindle_highlights.R

Defines functions process_kindle_highlights

process_kindle_highlights <- function(fpath, contains_quotes = FALSE, find_vocab = TRUE) {
  # Convert exported .txt file created by Bookcision to a clean plain text file. Instructions
  # for Bookcision found here: https://readwise.io/bookcision
  #
  # Arguments:
  #   fpath {char} -- path to exported .txt Kindle highlights file
  #   contains_quotes {logical} -- do the highlights contain quotations (indented text blocks
  #                                on Kindle)?
  #   find_vocab {logical} -- if TRUE, look for highlights that are made up of only a single word,
  #                           and these will be written to a separate vocab list
  #
  # Returns:
  #   nothing -- writes output file
  
  # Extract highlights
  notes = readLines(fpath)
  notes = paste0(notes, collapse="\n")
  notes = strsplit(notes, "\n\n")[[1]]
  notes = gsub("(.*)\\n(LOCATION: \\d+)", "\\1 [\\2]", notes)
  
  # Clean notes
  notes = gsub("’|‘", "'", notes)
  notes = gsub("“|”", '"', notes)
  notes = gsub("…", "...", notes)
  
  # If highlights contain quotes, the highlight will then be in the format:
  # <TEXT>\.\d+ <NAME_OF_QUOTE_AUTHOR>$
  if (contains_quotes) {
    notes = gsub("(.*)(\\.|\\?|\\!)(\\d+ )(.*)( \\[LOCATION: \\d+\\])$", "\\1\\2 [QUOTE: \\4]\\5", notes)
  }
  
  # If highlights contain vocab (single-word highlights), separate to new char vector without
  # LOCATION included
  if (find_vocab) {
    vocab_loc = which(sapply(gsub("(.*)( \\[LOCATION: \\d+\\])$", "\\1", notes), function(x) {
      words = strsplit(x, " ")[[1]]
      return (ifelse(length(words) == 1, TRUE, FALSE))
    }))
    if (length(vocab_loc)) {
      vocab = tools::toTitleCase(names(vocab_loc))
      vocab = gsub("(.*)([[:punct:]])$", "\\1", vocab)
      notes = notes[-unname(vocab_loc)]
    }  
  }
    
  # Extract title of book from exported filename
  title = gsub("(Kindle\\.Highlights_)(.*)(_\\d+)(\\.txt)", "\\2", basename(fpath))
  title = gsub("\\.", " ", title)
  
  writeLines(notes, file.path(dirname(fpath), paste0(title, " Kindle Highlights.txt")))
  if (find_vocab) {
    if (length(find_vocab)) {
      writeLines(vocab, file.path(dirname(fpath), paste0(title, " Vocab.txt")))
    }
  }
}
tsouchlarakis/rdoni documentation built on Sept. 16, 2019, 8:53 p.m.