R/get_tags.R

Defines functions get_tags

Documented in get_tags

#' Get all tags and their frequency for given artists
#'
#' @param artist_vector \code{character} vector with specified artists
#'
#' @return \code{data.table} object with columns: artist, tag, tag_freq
#'
#' @examples
#' tags <- get_tags(artist_vector = c('Saxon', 'Iron Maiden'))
#'
#' @export
get_tags <- function(artist_vector) {

  total <- length(artist_vector)

  #length of tags not known need to store list of data.tables and rbind later
  dt_list <- replicate(total, NA, simplify = FALSE)

  #get XML files
  artists_encoded <- sapply(artist_vector, function(x) URLencode(x, reserved = TRUE))
  lastfm_urls <- paste0(
    api_root,
    "artist.gettoptags&",
    "artist=",
    artists_encoded,
    "&autocorrect=0",
    "&api_key=",
    api_key
  )

  pb <- txtProgressBar(min = 0, max = total, style = 3)
  add_data <- function(response){
    dt_index <- which(lastfm_urls == response$url)
    current_artist = artist_vector[dt_index]
    parsed_xml <- read_xml(parse_content(response))
    entries <- xml_find_all(parsed_xml, ".//tag")
    tags <- xml_text(xml_find_all(entries, './/name'))
    counts <- as.integer(xml_text(xml_find_all(entries, './/count')))
    tags_dt <- data.table(
      artist = rep(current_artist, length(tags)),
      tag = tags,
      tag_freq = counts
    )
    if(tags_dt[, .N] == 0){
      warning(sprintf("Artist %s not found", current_artist))
    }
    dt_list[[dt_index]] <<- tags_dt
    setTxtProgressBar(pb, getTxtProgressBar(pb) + 1L)
  }

  # process data in 100-url batches
  all_indices <- 1:total
  batches <- split(all_indices, ceiling(seq_along(all_indices) / 100))
  for (i in 1:length(batches)) {
    current_batch <- batches[[i]]
    run_batch(url_list = lastfm_urls, indices = current_batch, update_data = add_data)
  }

  close(pb)
  return(rbindlist(dt_list))
}
ppatrzyk/lastfmR documentation built on May 28, 2019, 7:35 a.m.