In rommelnc/hudar: High-Level Unstructured Data Analysis

inicio = Sys.time()
options(RCHART_WIDTH = NA, RCHART_HEIGHT = NA)
knitr::opts_chunk$set(comment = NA, results = 'asis', tidy = F, message = F)
source('funcoesTwitter.R', encoding='UTF-8')
source('salvaLinkEleicoes.R', encoding='UTF-8')

#load('../../data/politics-tweets-2014-06-24.Rda')
#tweets$text = gsub("[^[:graph:]]", " ",tweets$text)

#eleicoes_full = tweets[grep("eleicao|eleição|eleicoes|eleições|Dilma Rousseff|Aécio Neves|Aecio Neves|Eduardo Campos|Marina Silva|aecio|dilma|marina|presidencia|presidência|lula|presidenta|pt|psb|psdb", tweets$text, ignore.case=TRUE),]

#eleicoes = eleicoes_full

getTweetsFromFullUrl = function(url, tweets, decodedLinks){ 
  short_links = names(which(decodedLinks == url))
  tweetsWithUrl = lapply(short_links, function(x) grep(x, tweets$text))
  tweetsWithUrl = unique(unlist(tweetsWithUrl))  
  tweetsWithUrl = tweets[tweetsWithUrl,]
  tweetsWithUrl
}

getUrlsFromTweets = function(tweets, decodedLinks, table = FALSE) {
  short_links = getShortLinks(tweets)
  urls = sapply(short_links, function(x) decodedLinks[x])
  urls = as.character(unlist(urls))
  if (table == TRUE) {
    urls = sort(table(urls), decreasing=TRUE)
  }
  urls
}

# decode_short_url <- function(url, ...) {
#   # PACKAGES #
#   require(RCurl)
#   
#   # LOCAL FUNCTIONS #
#   decode <- function(u, tries) {
#     cat(paste("teste", u, "\n"))
#     x <- try( getURL(u, header = TRUE, nobody = TRUE, followlocation = FALSE, cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl")) )
#     if(inherits(x, 'try-error') | length(grep(".*[lL]ocation: (\\S+).*", x))<1) {
#       return(u)
#     } else {
#       ret = gsub(".*[lL]ocation: (\\S+).*", "\\1", x)
#       if (str_length(ret) < 40 & tries < 4 ) {
#         return(decode(ret, tries+1))
#       }
#       else {
#         return(ret)
#       }
#     }
#   }
#   
#   # MAIN #
#   # return decoded URLs
#   urls <- c(url, ...)
#   l <- lapply(urls, decode, tries = 0)
#   names(l) <- urls
#   return(l)
# }
# 
# 
# getShortLinks = function(tweets){
#   library(stringr)
#   links = str_extract_all(tweets$text, "(https?://t.co[^(\n|\t| |\")][^(\n|\t| |\")]+)")
#   links = unlist(links)
#   links = links[str_length(links) > 20]
#   links
# }
# 
# 
# getDecodedLinks = function(tweets){
#   links = getShortLinks(tweets)
#   uniq = unique(links)
#   decoded = decode_short_url(uniq)
#   decodedLinks = links
#   m = match(links, names(decoded))
#   change <- !is.na(m)
#   decodedLinks[change] <- decoded[m[change]]
#   decodedLinks
# }


#links = getDecodedLinks(eleicoes[1:2000,])
#topLinks = sort(table(as.character(links)), decreasing=TRUE)
#restantes = topLinks

#save(topLinks, file = "toplinks.Rda")
#load("../../data/politics-tweets-2014-07-04-all.Rda")
#load("../../data/politics-tweets-2014-07-04-copa-jogo.Rda")
# load("../../data/politics-tweets-2014-07-04-copa-jogo-simples.Rda")
# load("../../data/politics-tweets-2014-07-08-copa-jogo-simples.Rda")
# load("../../data/politics-tweets-2014-07-08-all.Rda")
#load("../../data/politics-tweets-2014-07-01-to-2014-07-31-eleicoes.Rda")
#load("../../data/politics-tweets-2014-08-17-to-2014-08-20-eleicoes-all.Rda")
#restantes = sort(table(unlist(linkEleicoes)), decreasing = TRUE)
load('../../data/politics-tweets-2014-08-17-to-2014-08-20-eleicoes-all.Rda')
load('../../data/politics-tweets-2014-08-17-to-2014-08-20-eleicoes-all-resultado.Rda')
restantes = getUrlsFromTweets(resultado$tweets[[1]], links, TRUE)

#quantidade de itens a serem mostrados
qtdTweets = 6
qtdFacebook = 6
qtdInstagram = 6
qtdYoutube = 6
qtdNoticias = 8

Imagens do Twitter mais vistas

wzxhzdk:4

Imagens do facebook mais vistas

wzxhzdk:5

Imagens do Instagram mais vistas

wzxhzdk:6

Vídeos do YouTube mais vistos

wzxhzdk:7

Notícias mais vistas

wzxhzdk:8

fim = Sys.time()
# print('Tempo total:')
# print(fim - inicio)

rommelnc/hudar documentation built on May 27, 2019, 1:49 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

rommelnc/hudar
High-Level Unstructured Data Analysis

In rommelnc/hudar: High-Level Unstructured Data Analysis

Imagens do Twitter mais vistas

Imagens do facebook mais vistas

Imagens do Instagram mais vistas

Vídeos do YouTube mais vistos

Notícias mais vistas

R Package Documentation

Browse R Packages

We want your feedback!

rommelnc/hudar High-Level Unstructured Data Analysis

In rommelnc/hudar: High-Level Unstructured Data Analysis

Imagens do Twitter mais vistas

Imagens do facebook mais vistas

Imagens do Instagram mais vistas

Vídeos do YouTube mais vistos

Notícias mais vistas

R Package Documentation

Browse R Packages

We want your feedback!

rommelnc/hudar
High-Level Unstructured Data Analysis