
# Web Scraping part 4: APIs
# author: Rolf Fredheim and Aiora Zabala
# University of Cambridge
# 11/03/2014

# Catch up
# Slides from week 1: http://quantifyingmemory.blogspot.com/2014/02/web-scraping-basics.html
# Slides from week 2: http://quantifyingmemory.blogspot.com/2014/02/web-scraping-part2-digging-deeper.html
# Slides from week 3: http://quantifyingmemory.blogspot.com/2014/03/web-scraping-scaling-up-digital-data.html

# Get the docs:
# http://fredheir.github.io/WebScraping/Lecture4/p4.html
# http://fredheir.github.io/WebScraping/Lecture4/p4.Rpres
# http://fredheir.github.io/WebScraping/Lecture4/p4.r

#Facebook API 
# fqlQuery='select share_count,like_count,comment_count from link_stat where url="'
# url="http://www.theguardian.com/world/2014/mar/03/ukraine-navy-officers-defect-russian-crimea-berezovsky"
# queryUrl = paste0('http://graph.facebook.com/fql?q=',fqlQuery,url,'"')  #ignoring the callback part
# lookUp <- URLencode(queryUrl) #What do you think this does?
# lookUp

#Read it in:
# require(jsonlite)
# rd <- readLines(lookUp, warn="F") 
# dat <- fromJSON(rd)
# dat

# #Geocoding
# #write a function
# getUrl <- function(address,sensor = "false") {
#  root <- "http://maps.google.com/maps/api/geocode/json?"
#  u <- paste0(root,"address=", address, "&sensor=false")
#  return(URLencode(u))
# }
# getUrl("Kremlin, Moscow")
# #In use
# require(RJSONIO)
# target <- getUrl("Kremlin, Moscow")
# dat <- fromJSON(target)
# latitude <- dat$results[[1]]$geometry$location["lat"]
# longitude <- dat$results[[1]]$geometry$location["lng"]
# place <- dat$results[[1]]$formatted_address
# latitude
# longitude
# place
# #Getting a static map
# #Construct that URL in R using paste?
# base="http://maps.googleapis.com/maps/api/staticmap?center="
# latitude=55.75
# longitude=37.62
# zoom=13
# maptype="hybrid"
# suffix ="&size=800x800&sensor=false&format=png"
# #Possible solution
# base="http://maps.googleapis.com/maps/api/staticmap?center="
# latitude=55.75
# longitude=37.62
# zoom=13
# maptype="hybrid"
# suffix ="&size=800x800&sensor=false&format=png"
# target <- paste0(base,latitude,",",longitude,
#                  "&zoom=",zoom,"&maptype=",maptype,suffix)
# #What to do next...?
# download.file(target,"test.png", mode = "wb")
# #Leftovers
# #non-latin strings in scraper output:
# bbcScraper <- function(url){
#   SOURCE <-  getURL(url,encoding="UTF-8")
#   PARSED <- htmlParse(SOURCE,encoding="UTF-8")
#   title=xpathSApply(PARSED, "//h1[@class='story-header']",xmlValue)
#   date=as.character(xpathSApply(PARSED, "//meta[@name='OriginalPublicationDate']/@content"))
#   if (is.null(date))    date <- NA
#   if (is.null(title))    title <- NA
#   return(c(title,date))
# }
# #Social APIs
# url="http://www.theguardian.com/uk-news/2014/mar/10/rise-zero-hours-contracts"
# target=paste0("http://urls.api.twitter.com/1/urls/count.json?url=",url)
#   rd <- readLines(target, warn="F") 
#   dat <- fromJSON(rd)
#   dat
#   shares <- dat$count
# #Social APIs, my solutions
# #Linkedin
# url="http://www.theguardian.com/uk-news/2014/mar/10/rise-zero-hours-contracts"
# target=paste0("http://www.linkedin.com/countserv/count/share?url=$",url,"&format=json")
#   rd <- readLines(target, warn="F") 
#   dat <- fromJSON(rd)
# #StumbleUpon
# url="http://www.theguardian.com/uk-news/2014/mar/10/rise-zero-hours-contracts"
# target=paste0("http://www.stumbleupon.com/services/1.01/badge.getinfo?url=",url)
#   rd <- readLines(target, warn="F") 
#   dat <- fromJSON(rd)
# #Map making 2: my approach
# query="cambridge university"
# target=paste0("http://geocode-maps.yandex.ru/1.x/?format=json&lang=en-BR&geocode=",query)
#   rd <- readLines(target, warn="F") 
#   dat <- fromJSON(rd)
# #Exctract address and location data
# address <- dat$response$GeoObjectCollection$featureMember[[1]]$
#   GeoObject$metaDataProperty$GeocoderMetaData$AddressDetails$Country$AddressLine
# pos <- dat$response$GeoObjectCollection$featureMember[[1]]$
#   GeoObject$Point
# require(stringr)
# temp <- unlist(str_split(pos," "))
# latitude=as.numeric(temp)[1]
# longitude=as.numeric(temp)[2]
# #Map making 2: my approach 2
# zoom=13
# lang="en-US"
# maptype="map" #pmap,map,sat,trf (traffic!) Note: if using sat, file is in JPG format, not PNG
# target <- paste0("http://static-maps.yandex.ru/1.x/?ll=",latitude,",",longitude,"&size=450,450&z=",zoom,"&l=map&lang=",lang,"&l=",maptype)
# download.file(target,"test.png", mode = "wb")
# #YouTube stats
# #Function to return stats about a single video
# getStats <- function(id){
#   url=paste0("https://gdata.youtube.com/feeds/api/videos/",id,"?v=2&alt=json")
#   raw.data <- readLines(url, warn="F") 
#   rd  <- fromJSON(raw.data)
#   dop  <- as.character(rd$entry$published)
#   term <- rd$entry$category[[2]]["term"]
#   label <- rd$entry$category[[2]]["label"]
#   title <- rd$entry$title
#   author <- rd$entry$author[[1]]$name
#   duration <- rd$entry$`media$group`$`media$content`[[1]]$duration
#   favs <- rd$entry$`yt$statistics`["favoriteCount"]
#   views <- rd$entry$`yt$statistics`["viewCount"]
#   dislikes <- rd$entry$`yt$rating`["numDislikes"]
#   likes <- rd$entry$`yt$rating`["numLikes"]
#   return(data.frame(id,dop,term,label,title,author,duration,favs,views,dislikes,likes))
# }
# #YouTube Comments
# #Function to return comments about a video
# getComments <- function(id){
#   url=paste0("http://gdata.youtube.com/feeds/api/videos/",id,"/comments?v=2&alt=json")
#   raw.data <- readLines(url, warn="F") 
#   rd  <- fromJSON(raw.data)
#   comments <- as.character(sapply(1:length(rd$feed$entry), function(x) (rd$feed$entry[[x]]$content)))
#   return(comments)
# }
