#' Scrape a Google Play Store app page
#'
#' @param app_url A url to an app on the Google Play Store.
#' @import rvest
#' @export
#' @return A list of the data from the Google Play Store page for the app.
#' @examples
#' getData_SingleApp("https://play.google.com/store/apps/details?id=com.dinaga.photosecret")
#' getData_SingleApp("https://play.google.com/store/apps/details?id=com.jamezuki.hidden")
getData_SingleApp <- function(app_url, tag_list){
html <- read_html(app_url)
# generate text to search for in html based on tag information
search_text <- lapply(tag_list, function (x) (paste0("//", x$html_tag, "[contains(@", x$html_attribute, ",'", x$html_attribute_value, "')]")))
# get data
values <- lapply(search_text, function (x) (html %>%
rvest::html_nodes('body') %>%
xml2::xml_find_all(x[[1]]) %>%
rvest::html_text()))
# get tag names
tag_names <- lapply(tag_list, function (x) (x$name))
# rename variable names as tag names
names(values) <- tag_names
# NOTE: When this function was written, Google Play Store used the same
# html_tag, html_attribute, and html_attribute_value for mulitple fields. If
# this is the case, then the values for all such fields will be show up in R
# as a list under a single tag name. The following code, splits the list
# entries into individual entries with indivual names.
if (length(values$developer_and_genre)==2){
# Add separate developer and genre items
values$developer <- values$developer_and_genre[1]
values$genre <- values$developer_and_genre[2]
# Delete developer_and_genre item
values$developer_and_genre <- NULL
}
if (length(values$updated_etc)!=1){
# Separate items
values$updated <- values$updated_etc[1]
values$size <- values$updated_etc[3]
values$installs <- values$updated_etc[5]
values$current_version <- values$updated_etc[7]
values$requires_Android <- values$updated_etc[9]
values$developer_email <- values$updated_etc[19]
# Delete updated_etc item
values$updated_etc <- NULL
}
# NOTE: Note all apps have entries for all tags. For example, if the app
# doesn't have enough ratings, the ratings field doesn't show up on the Google
# Store App Page. The following code replaces empty values with NA, so that an
# error doesn't occur when the getData function tries to put the data in a
# data frame.
# Replace empty values with NA
values <- lapply(values, function (x) (if(length(x)==0){x <-as.character(NA)} else{x <- x}))
return(values)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.