#' IMDb Scraper Utility
#'
#' Provides a simple scraping utility for access to the IMDb movie database
#'
#' @param url the url to scrape
#' @export
#' @examples
#'
#' scrape.IMDb('http://www.imdb.com/title/tt0903747/epdate') # Scrapes Breaking Bad Ratings
#'
scrape.IMDb <- function(url)
{
require(XML)
doc <- htmlParse(url)
tables <- readHTMLTable(doc)
nrows <- unlist(lapply(tables, function(t) dim(t)[1]))
df <- tables[[which.max(nrows)]]
return(df)
}
#' Klout Analysis for Twitter Utility
#'
#' Interfaces with klout to determine the influential rating of any twitter user
#'
#' @param handles a character vector of twitter handles
#' @param api.key a twitter API key
#' @param na_omit a boolean regarding whether to omit 'NA' values
#' @param .progress boolean value indicating whether a progress bar should be displayed
#' @export
#'
scrape.retrieveklout <- function(handles, api.key, na_omit=TRUE, .progress='none') {
twitter_handles <- gsub("@", "", handles)
getID <- function(twitter_handle) {
base <- 'http://api.klout.com/v2/identity.json/twitter?screenName='
url <- paste0(base, twitter_handle, "&key=", api.key)
out <- try(fromJSON(getURL(url))$id, TRUE)
if(class(out)=='try-error'){
id <- NA
} else {
id <- str_trim(out)
}
return(id)
}
ids <- laply(twitter_handles, getID, .progress=.progress)
exists <- !is.na(ids)
twitter_handles_TRUE <- twitter_handles[exists]
if(length(twitter_handles_TRUE) < length(twitter_handles)) {
warning(paste("No Klout Scores for:",
paste(twitter_handles[is.na(ids)], collapse=" "))
)
# partition output by error status
exists <- !is.na(ids)
ids <- ids[exists]
twitter_handles_TRUE <- twitter_handles[exists]
twitter_handles_FALSE <- twitter_handles[!exists]
}
getScore <- function(id) {
url <- paste0('http://api.klout.com/v2/user.json/', id,'/score', '?key=', api.key)
out <- try(fromJSON(getURL(url))$score, TRUE)
if(class(out)=='try-error'){
score <- NA
} else {
score <- str_trim(out)
}
return(score)
}
score <- laply(ids, getScore, .progress=.progress)
# prepare output, leave NAs
output_TRUE <- data.frame(handle = twitter_handles_TRUE, id=ids, score=score, stringsAsFactors=F)
if(na_omit){
output <- output_TRUE
} else {
output_FALSE <- data.frame(handle = twitter_handles_FALSE, stringsAsFactors=F)
output_FALSE$id <- NA
output_FALSE$score <- NA
output <- data.frame(rbind(output_TRUE, output_FALSE), stringsAsFactors=F)
}
output$score <- as.numeric(str_trim(output$score))
return(output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.