R/GetFinancialStatementV2_NotWorking.R

Defines functions GetFinancialStatement

Documented in GetFinancialStatement

GetFinancialStatement=function(Symbol='SAP',URL='yahoo',tz="UTC",Silent=TRUE){
#   #GetFinancialStatement('SAP')
  #Symbol='SAP'
  #URL='morningstar'
  Silent=TRUE
  library(tidyverse)
  library(rvest)
  if (URL == 'yahoo') {
    requireNamespace('xml2')
    requireNamespace('dplyr')
    requireNamespace('rvest')
    tryCatch({
      library(RSelenium)
      #library(wdman)
      
      #library(dplyr)
      
      # Intializing the firefox driver at the given port
      remDr <- RSelenium::rsDriver(browser = "firefox",port=4445L )
      #library(seleniumPipes)
      #remDr <- seleniumPipes::remoteDr(browser = "firefox",port=4445)
      # Intializing the server on the same port
      remDr <- RSelenium::remoteDriver$new(port=4445L)
      #remDr
      
      # remDr$setTimeout(type = "page load", milliseconds = 50000) #miliseconds
      # remDr$setTimeout(type = "script", milliseconds = 50000)
      # remDr$setTimeout(type = "implicit", milliseconds = 50000)
      # 
      # #suppressMessages(remDr$open())
      # 
      # remDr$timeouts$implicit=50000
      #  remDr$timeouts$script=50000
     

      #if you want to see whats happening
      #driver=rsDriver(port = 4567L,browser = "chrome")
      #remDr=driver[["client"]]
   
      }, error = function(e){
        print(e)
        print(remDr)
      }
      )
    n=length(Symbol)
    QuarterlyData=list()
    FinanceStatements=list()
    for(i in 1:n){
      #msg = utils::capture.output(remDr$open())#
      try(remDr$open())
      # remDr=tryCatch({remDr$open()},error=function(e){
      #   print(e)
      #   remDr <- RSelenium::rsDriver(browser = "firefox",port=4445L )
      #   # Intializing the server on the same port
      #   remDr <- RSelenium::remoteDriver$new(port=4445L)
      #   remDr$open()
      # })
      # 
      # if (!Silent) {
      #   print(msg)
      # }
      url = paste0("https://finance.yahoo.com/quote/",
                   Symbol[i],
                   "/financials?p=",
                   Symbol[i])
      #url = paste0("https://finance.yahoo.com/quote/SAP/financials?p=SAP")
      quarterly=NULL
      tryCatch({
        remDr$navigate(url)
        #seleniumPipes::go(remDr,url)
        #copy selector then put it here
        ######################### This paart here is not needed. It was the requirement in Chrome, not in FireFox.###############
        #if(i==1){
        webElem = remDr$findElement(using = 'name','agree')
       # webElem=seleniumPipes::findElement(remDr = remDr,using =  'name',value = 'agree')
        # webElem <-
        #   remDr$findElement(
        #     using = 'css selector',
        #     'body > div.consent-wizard.eu-single-page > div.consent-wizard-body.eu-single-page
        #     > div.consent-steps-container > div > div.single-page-forms.yahoo >
        #     form.consent-form.single-page-form.single-page-agree-form > div > input')
        webElem$clickElement()
        #}
        #author of this part: Hamza Tayyab
        # get annual data
        
        # annual <- xml2::read_html(remDr$getPageSource()[[1]])
        # annual <- annual %>% rvest::html_table(fill = T, header = F)%>% .[[1]]  %>% filter(X1!=X2)
        # annual
        
        # click on Quarterly button
        #copy xpath then put it here
        webElem <-
          remDr$findElement(using = 'xpath',
                            '//*[@id="Col1-1-Financials-Proxy"]/section/div[1]/div[2]/button')
        webElem$clickElement()
        
        # WEb SCRAPING ---- 
        #get quarterly data
        quarterly <- xml2::read_html(remDr$getPageSource()[[1]])
        
      }, error = function(e){
        Data=list(Symbol[i])
        print(e)
        print(url)
      }
      )
      try(remDr$close())
      if(!is.null(quarterly)){
        #Extract all tables
        quarterly = rvest::html_table(quarterly,fill = T, header = F) 
        #select correct table
        quarterly=quarterly[[1]]
        #quarterly <- quarterly %>% rvest::html_table(fill = T, header = F) %>% .[[1]]  %>% filter(X1 != X2)
        #delete yahoo headers of variables without any input
        quarterly=dplyr::filter(quarterly,X1 != X2)
        
        #quarterly
        #return(quarterly)
        #end of author of this part: Hamza Tayyab
        Features = sapply(quarterly, function(x)
          gsub(',', '', x))
        Features[1, 1] = "Time"
        #Features=as.tibble(Features)
        Time = as.character(as.vector(Features[1, 2:ncol(Features)]))
        names(Time) = NULL
        Header = Features[, 1]
        Data = t(Features[2:nrow(Features), 2:ncol(Features)])
        mode(Data) = 'numeric'
        FeaturesT = data.frame(Time = as.Date(strptime(Time, format = '%m/%d/%Y',tz=tz),tz=tz), Data)
        colnames(FeaturesT) = Header
        #Data=list(FeaturesT)
        FinanceStatements[[i]]=FeaturesT
      }else{
        FinanceStatements[[i]]=NULL
      } # end if # quaterly not null
      print(i)
    }#end for each symbol
    names(FinanceStatements)=Symbol
    return(FinanceStatements)
  }#end if URL=='yahoo'
  
  if (URL == 'investing') {
    url = 'https://www.investing.com/equities/sap-ag-income-statement'
  }
  if (URL == 'msn') {
    url = 'https://www.msn.com/en-us/money/stockdetails/financials/fi-126.1.SAP.NYS'
  }
  if (URL == 'morningstar') {
    url = paste0(
      'http://financials.morningstar.com/ajax/ReportProcess4CSV.html?t=XFRA:',
      Symbol,
      '&reportType=is&period=3&dataType=A&order=asc&denominatorView=raw&columnYear=5&number=3'
    )
    
    tryCatch({
      raw = read.csv(
        file = url,
        header = T,
        sep = ',',
        skip = 1,
        stringsAsFactors = F
      )
      DF = raw[, c(2:6)]
      Header = raw[, 1]
      Time = gsub('X', '', colnames(raw)[2:6])
      DF = t(DF)
      colnames(DF) = Header
      rownames(DF) = Time
      DF[!is.finite(DF)] = NaN
      
      inddel = c()
      share = 'Earnings per share'
      indshare = which(Header == share)
      if (length(indshare) > 0) {
        colnames(DF)[indshare + 1] = paste(colnames(DF)[indshare + 1], share)
        colnames(DF)[indshare + 2] = paste(colnames(DF)[indshare + 2], share)
        inddel = c(inddel, indshare)
      }
      weight = 'Weighted average shares outstanding'
      indweight = which(Header == weight)
      if (length(indweight) > 0) {
        colnames(DF)[indweight + 1] = paste(colnames(DF)[indweight + 1], weight)
        colnames(DF)[indweight + 2] = paste(colnames(DF)[indweight + 2], weight)
        inddel = c(inddel, indweight)
      }
      
      other1 = 'Other income (expense)'
      indother1 = which(Header == other1)
      if (length(indweight) > 0) {
        indother2 = which(colnames(DF) == other1)
        indtemp1 = which(Header == "Total operating expenses")
        indtemp2 = which(Header == "Total nonoperating income, net")
        if (length(indtemp1))
          colnames(DF)[indother2[1]] = paste(colnames(DF)[indtemp1], other1)
        else
          colnames(DF)[indother2[1]] = paste('FirstListed', other1[1])
        if (length(indtemp1))
          colnames(DF)[indother2[2]] = paste(colnames(DF)[indtemp2], other1)
        else
          colnames(DF)[indother2[2]] = paste('SecondListed', other1)
      }
      
      indoperating = which(Header == "Operating expenses")
      if (length(indoperating) > 0)
        inddel = c(inddel, indoperating)
      
      if (length(inddel) > 0)
        DF = DF[, -inddel]
      
      HeaderNew = colnames(DF)
      
      indOther = which(HeaderNew == "Other operating expenses")
      if (length(indOther) > 0)
        DF[is.nan(DF[, indOther]), indOther] = 0
      
      indOther2 = which(HeaderNew == "Other")
      if (length(indOther2) > 0)
        DF[is.nan(DF[, indOther2]), indOther2] = 0
      
      indOther3 = which(HeaderNew == "Preferred dividend")
      if (length(indOther3) > 0)
        DF[is.nan(DF[, indOther3]), indOther3] = 0
      
      
      print (DF)
    }, error = function(e)
      print (url))
  }
}
Mthrun/TSAT documentation built on Feb. 5, 2024, 11:15 p.m.