R/getMondaysCompanies.R

# Hello, world!
#
# This is an example function named 'hello'
# which prints 'Hello, world!'.
#
# You can learn more about package authoring with RStudio at:
#
#   http://r-pkgs.had.co.nz/
#
# Some useful keyboard shortcuts for package authoring:
#
#   Build and Reload Package:  'Ctrl + Shift + B'
#   Check Package:             'Ctrl + Shift + E'
#   Test Package:              'Ctrl + Shift + T'

#' Title
#'
#'
#' @return
#' @export
#'
#' @examples
getMondaysCompanies <- function() {

  url = "https://www.lovemondays.com.br/pesquisa/empresa/pagina/"
  companies <- vector()
  contributions <- vector()
  values <- vector()
  links <- vector()
  pageNumber = 1

  repeat{
    base <- xml2::read_html(paste(url,pageNumber,sep=""))

    company <- base %>% rvest::html_nodes(".lm-List-item-title--large") %>% rvest::html_text() %>% readr::parse_character()
    contribution <- base %>% rvest::html_nodes(".lm-List-item-contributions") %>% rvest::html_text() %>% stringr::str_replace_all("\n","") %>% str_extract("[0-9]+") %>% readr::parse_number()
    value <- base %>% rvest::html_nodes(".lm-List-item-logoBox-valuation") %>% rvest::html_text() %>% stringr::str_replace_all("\n","") %>%  readr::parse_number()
    link <- base %>% rvest::html_nodes(".is-medium") %>% rvest::html_attr("href") %>% stringr::str_replace_all("/trabalhar-na-","")

    closeAllConnections()

    newValue <- vector()
    p = 0

    for(i in 1:length(contribution)){
      if(contribution[i] == 0){
        newValue <- c(newValue,0)
        p=p+1
      } else {
        newValue <- c(newValue,value[i-p])
      }
    }

    if(length(company)==0){
      break

    }

    companies <- c(companies,company)
    contributions <- c(contributions,contribution)
    values <- c(values,newValue)
    links <- c(links,link)


    pageNumber = pageNumber + 1
  }

  results <- data.frame(company=companies,n=contributions,value=values, link=links)
  results$value[is.na(results$value)]<-0

  return(results)


}
paulo663636/HRtools documentation built on June 6, 2019, 12:23 p.m.