R/functions.R

Defines functions go_to_rapportering go_to_databeheer go_to_centrale_new_versato go_to_tilroy driver_close driver_launch driver_initiate get_period_date click_aangevraagd_rapport click_rapport_selectie create_thematic_dir download_latest_excel select_leverancier select_alle_producten select_all_shops click_refresh click_excel_icon get_time_asked get_timestamp click_ask_report set_period_dates get_wiped_datepicker go_to_menu go_to_thematic_page check_report ask_report check_table_line_values get_table_line_data exec_report log_info

library(RSelenium)
library(dplyr)
library(purrr)
library(filesstrings)




# DEFINING FUNCTION TO GENERATE CLEAN LOG MESSAGES ----
log_info = function(message){
  cat(paste0(
    " ", "\n",
    "########", "\n",
    "*---",
    Sys.time(),
    "---*",
    "\n",
    message, "\n",
    "########", "\n",
    " "))
}




# DEFINING THE FUNCTION TO EXECUTE THE THEMATIC/PERIOD XLS FILES ASK, CHECK AND DOWNLOAD ----
#' @export
exec_report = function(thematic, period, temp_dir, root_dest_dir, selenium_host_name, selenium_port, user, pwd){
  log_info(paste0("Starting process to retrieve report for ", thematic$name))

  # SET THE FINAL DESTINATION FOLDER WHERE RENAMED REPORT FILE WILL BE STORED
  thematic$dest_dir = paste0(root_dest_dir, thematic$name, "/")

  # PRECONFIGURATION OF DOWNLOAD FOLDERS----
  create_thematic_dir(thematic$dest_dir, thematic$name)

  # START BROWSING  WITH SELENIUM SERVER DRIVER ----
  remDr = driver_initiate(selenium_host_name, selenium_port)
  driver_launch(remDr)
  remDr$setImplicitWaitTimeout(5000) # we tell the driver to wait 5 sec for element to load before sending timeout

  go_to_tilroy(remDr, user, pwd)
  
  go_to_centrale_new_versato(remDr)
  
  go_to_databeheer(remDr, user, pwd)
  
  ask_report(remDr, thematic, period)
  
  check_report(remDr, thematic, period, temp_dir)
  
}


# GET TABLE LINE DATA FUNCTION ----
get_table_line_data = function(remDr, lineNum){

  get_table_column_data = function(remDr,colNum,lineNum){

    css_selector = paste0(
      ".transaction-tbl>tbody>tr:nth-child(",
      lineNum, ")",
      ">td:nth-child(", colNum, ")")

    if(colNum == 6){
      css_selector = paste0(css_selector,">a")
    }

    column_data = remDr$findElement(using = "css", css_selector)


    while (is.null(column_data)){
      Sys.sleep(1)
      column_data = remDr$findElement(using = "css", css_selector)
    }

    if(colNum != 6){
      column_data = column_data$getElementText()[[1]]
    }

    if(colNum == 6){
      css_selector = paste0(css_selector,">a")
      column_data = column_data$getElementAttribute("href")[[1]]
    }

    return(column_data)

  }

  line_data = data.frame(
    lineNum = lineNum,
    date = get_table_column_data(remDr, 2, 1),
    user = get_table_column_data(remDr, 3, 1),
    status = get_table_column_data(remDr, 5, 1),
    fileURL = "no_file"
  )


  if(line_data$status == "Gemaakt"){
    line_data$fileURL = get_table_column_data(remDr, 6, 1)
  }

  return(line_data)
}


# CHECK TABLE LINE VALUES FUNTIONS ----
check_table_line_values = function(remDr, line_data){

  log_info(paste0("Checking last line values"))

  message(line_data[,2:4])

  time_current = Sys.time()
  time_line_data = strptime(line_data$date, format="%d/%m/%Y %H:%M")

  # if difference not greater than 12 hours, ok !
  time_diff = as.numeric(time_current - time_line_data)

  if(time_diff < 12 * 60){ # to express in minutes
    good_time_value = TRUE
  }else{
    good_time_value = FALSE
  }

  if(line_data$user == "POK POK"){
    good_user_value = TRUE
  }else{
    good_user_value = FALSE
  }

  if(line_data$status == "Gemaakt"){
    good_status_value = TRUE
  }else{
    good_status_value = FALSE
  }

  if(stringr::str_ends(line_data$fileURL, ".xlsx")){
    good_fileName_value = TRUE
  }else{
    good_fileName_value = FALSE
  }


  if(isTRUE(
    good_time_value && good_user_value && good_status_value && good_fileName_value
  )){
    check = TRUE
  }else{
    check = FALSE
  }
  return(check)
}

# ASK REPORT FUNCTION ----
ask_report = function(remDr, thematic, period){
  log_info(paste0("Starting process to ask report for ", thematic$name))

  go_to_rapportering(remDr)

  go_to_menu(remDr, thematic)

  click_rapport_selectie(remDr)

  period = paste0("period_", period)

  if(!is.null(thematic[[period]]$from) && !is.null(thematic[[period]]$to)){
    set_period_dates(remDr, thematic[[period]]$from, thematic[[period]]$to)
  }

  if(isTRUE(thematic$steps$all_shops)){
    select_all_shops(remDr)
  }

  if(isTRUE(thematic$steps$leverancier)){
    select_leverancier(remDr)
  }

  if(isTRUE(thematic$steps$alle_producten)){
    select_alle_producten(remDr)
  }

  click_ask_report(remDr)
}

# CHECK REPORT FUNCTION ----
check_report = function(remDr, thematic, period, temp_dir){
  log_info(paste0("Starting process to check report status for ", thematic$name))

  go_to_rapportering(remDr)

  go_to_menu(remDr, thematic)

  click_aangevraagd_rapport(remDr)


  line_1_values = get_table_line_data(remDr, 1)
  line_1_check = check_table_line_values(remDr, line_1_values)

  if(isTRUE(line_1_check)){
    log_info(paste0("Report is ready for ", thematic$name))
    download_latest_excel(remDr, temp_dir, thematic$dest_dir, thematic$name, period, line_1_values$fileURL, line_1_values$lineNum)

  }
  else{
    log_info(paste0("Report not yet ready for ", thematic$name, ". Will attempt again in 30 sec "))
    Sys.sleep(0.5*60) # wait 1/2 minute
    check_report(remDr, thematic, period, temp_dir)
  }
}

# GO TO THEMATIC PAGE FUNCTION ----
go_to_thematic_page = function(remDr, thematic){
  root_url = "https://ve2945.tilroy.com"
  thematic_url = paste0(root_url, thematic$submenu_URL)

  log_info(paste0("Navigating to ", thematic$menu, " page"))
  thematic_page = remDr$navigate(thematic_url)
}

# GO TO THEMATIC MENU/SUBMENU FUNCTION ----
go_to_menu = function(remDr, thematic){

  menu_xpath_arg = paste0("//*[contains(text(), ", "'", thematic$menu, "')]")

  menu = remDr$findElement(using = "xpath", menu_xpath_arg)

  while(is.null(menu)){
    Sys.sleep(1)
    menu = remDr$findElement(using = "xpath", menu_xpath_arg)
  }
  menu$clickElement()

  if(!is.null(thematic$submenu_URL)){

    go_to_thematic_page(remDr, thematic)

  }

}

# GET WIPED DATEPICKER FUNCTION ----
get_wiped_datepicker = function(remDr, to_or_from){

  if(to_or_from == "from"){
    datepicker = remDr$findElement(using = "xpath", "//input[@data-url-parameter='dateFrom']")
    while (is.null(datepicker)){
      Sys.sleep(1)
      datepicker = remDr$findElement(using = "xpath", "//input[@data-url-parameter='dateFrom']")
    }
    datepicker$clearElement()
  }else{
    datepicker = remDr$findElement(using = "xpath", "//input[@data-url-parameter='dateTo']")
    while (is.null(datepicker)){
      Sys.sleep(1)
      datepicker = remDr$findElement(using = "xpath", "//input[@data-url-parameter='dateTo']")
    }
    datepicker$clearElement()
  }
  return(datepicker)
}

# SET WIPED DATEPICKER DATES  FUNCTION ----
set_period_dates = function(remDr, from, to){
  log_info(paste0("Entering dates for report"))
  get_wiped_datepicker(remDr, "from")$sendKeysToElement(list(as.character(get_period_date(from)), key = "enter"))
  get_wiped_datepicker(remDr, "to")$sendKeysToElement(list(as.character(get_period_date(to)), key = "enter"))

  click_rapport_selectie(remDr) # this is a hack for the date pickers to work

}

# CLICK ASK REPORT FUNCTION ----
click_ask_report = function(remDr){
  button_vraag_rapport = remDr$findElement(using = "xpath", "//*[contains(text(), 'Vraag rapport aan')]")

  while(is.null(button_vraag_rapport)){
    Sys.sleep(1)
    button_vraag_rapport = remDr$findElement(using = "xpath", "//*[contains(text(), 'Vraag rapport aan')]")
  }
  log_info(paste0("Clicking vraag rapport"))
  button_vraag_rapport$clickElement()
}

# GET A TIMESTAMP FOR FILENAME FUNCTION ----
get_timestamp = function(as.time = TRUE ){
  timestamp = Sys.time()
  if(!isTRUE(as.time)){
    timestamp = as.character(timestamp)
    timestamp = stringr::str_replace_all(timestamp, "[[:punct:]]", " ")
    timestamp = stringr::str_replace_all(timestamp, " ", "_")
  }

}


get_time_asked = function(remDr){
  time_asked = remDr$findElement(using = "css", ".transaction-tbl>tbody>tr>td:nth-child(2)")$getElementText()
  while(is.null(time_asked)){
    Sys.sleep(1)
    time_asked = remDr$findElement(using = "css", ".transaction-tbl>tbody>tr>td:nth-child(2)")$getElementText()
  }
  time_asked = strptime(time_asked, format="%d/%m/%Y %H:%M")
  return(time_asked)
}


# GET THEMATICS NAMES FUNCTION ----
# get_thematics_name = function(){
#   thematics = list(
#     Aankooporders = "Aankooporders",
#     DailySales = "DailySales",
#     AanvulRapportPerWinkel = "AanvulRapportPerWinkel",
#     OpenOrdersRapport = "OpenOrdersRapport"
#   )
# }


# CLICK EXCEL ICON FUNCTION -----
click_excel_icon = function(remDr){
  last_report_xls_icon = remDr$findElement(using = "css", ".transaction-tbl>tbody>tr>td:nth-child(6)>a")

  while (is.null(last_report_xls_icon)){
    print("Waiting for last report xls icon to show up...")
    Sys.sleep(1)
    last_report_xls_icon = remDr$findElement(using = "css", ".transaction-tbl>tbody>tr>td:nth-child(6)>a")
  }

  last_report_xls_icon$clickElement()
}


# CLICK REFRESH FUNCTION ----
click_refresh = function(remDr){

  button_refresh = remDr$findElement(using = "xpath", "//*[contains(text(), 'Refresh')]")

  while (is.null(button_refresh)){
    print("Waiting for refresh button to show up...")
    Sys.sleep(1)
    button_refresh = remDr$findElement(using = "xpath", "//*[contains(text(), 'Refresh')]")
  }


  button_refresh$clickElement()
}

# CLICK ALL SHOPS FUNCTION ----
select_all_shops = function(remDr){
  all_shops_button = remDr$findElement("id", "selectAll")

  while (is.null(all_shops_button)){
    print("Waiting for all_shops_button button to show up...")
    Sys.sleep(1)
    all_shops_button = remDr$findElement("id", "selectAll")
  }

  all_shops_button$clickElement()
}

# CLICK ALL PRODUCTS FUNCTION ----
select_alle_producten = function(remDr){
  alle_producten = remDr$findElement(using = "id", value = "selectionMethodAll")

  while (is.null(alle_producten)){
    print("Waiting for alle producten button to show up...")
    Sys.sleep(1)
    alle_producten = remDr$findElement(using = "id", value = "selectionMethodAll")
  }

  alle_producten$clickElement()
}

# CLICK RETOUR LEVERANCIERS
select_leverancier = function(remDr){

  retour_leverancier_stockherstelling = remDr$findElement(using = "id", value = "2523-selection")
  #retour_leverancier_stockherstelling = remDr$findElement(using = "xpath", "//*[contains(text(), 'Retour leverancier stockherstelling')]")
  while(is.null(retour_leverancier_stockherstelling)){
    Sys.sleep(1)
    retour_leverancier_stockherstelling = remDr$findElement(using = "id", value = "2523-selection")
  }

  retour_leverancier_stockherstelling$clickElement()
}



# DOWNLOAD LATEST EXCEL FILE FUNCTION ----
# Note that the files are downloaded to the server default download directory.
# https://stackoverflow.com/questions/35504731/specify-download-folder-in-rselenium
# https://stackoverflow.com/questions/42293193/rselenium-on-docker-where-are-files-downloaded
download_latest_excel = function(remDr, temp_dir, dest_dir, name, period, fileURL, lineNum){
  
    log_info(paste0("Starting download of report for ", name, " ", period))
    remDr$navigate(fileURL)

    fileName = strsplit(as.character(fileURL), "/")[[1]]
    fileName = tail(fileName,1)
    
    oldFile = paste0(temp_dir, fileName) # see docker compose file and execute.R
    oldFile_part = paste0(oldFile, ".part")
    
    
    while(file.exists(oldFile_part)){
      Sys.sleep(1)
      oldFile_part = paste0(oldFile, ".part")
    }
    
    log_info(paste0("Download is completed ", name, " ", period))
    
    newFile = paste0(dest_dir, "NV_", name, "_", "period_", period, "_", get_timestamp(as.time = FALSE), ".xlsx")
    
    file.rename(
      from = oldFile,
      to = newFile)
    
    # Close the remote driver
    driver_close(remDr)
    
    # return the newFile location
    return(newFile)
    
    # while (!file.exists(oldFile_part)){
    #   log_info(paste0("Download is completed ", name, " ", period))
    # 
    #   
    #   # https://towardsdatascience.com/how-to-automate-live-data-to-your-website-with-python-f22b76699674
    #   # https://stackoverflow.com/questions/52262584/selenium-firefox-webdriver-doesnt-make-a-complete-download-of-a-pdf-with-python
    #   
    #   newFile = paste0(dest_dir, "NV_", name, "_", "period_", period, "_", get_timestamp(as.time = FALSE), ".xlsx")
    #   
    #   file.rename(
    #     from = oldFile,
    #     to = newFile)
    # 
    #   
    #   # file.copy(from = oldFile, to = newFile)
    #   # 
    #   # while(!file.exists(newFile)){
    #   #   Sys.sleep(1)
    #   # }
    #   # 
    #   # file.remove(oldFile) # this blocks the rest of the script !
    #   
    #   
    #   # https://stackoverflow.com/questions/12193779/how-to-write-trycatch-in-r
    #   # result = tryCatch({
    #   #   file.copy(from = oldFile, to = newFile)
    #   #   file.remove(oldFile)
    #   #   result = "success"
    #   # }, warning = function(warning_condition) {
    #   #   log_info(paste0("Warning raised while attempting to copy the file for ", name, " ", period, "from temp folder to thematic folder"))
    #   #   return(result = "warning")
    #   # }, error = function(error_condition) {
    #   #   log_info(paste0("An error occured while attempting to copy the file for ", name, " ", period, "from temp folder to thematic folder"))
    #   #   return(result = "error")
    #   # }, finally={
    #   #   log_info(paste0("the requested file for ", name, " ", "period was processed with ", result))
    #   # })
    #   
    #   # Close the remote driver
    #   driver_close(remDr)
    #   
    #   # return the newFile location
    #   return(newFile)
    # }
    

}

# CREATE DIRECTORY FOR THEMATIC  FILE DOWNLOAD FUNCTION ----
create_thematic_dir = function(dest_dir, thematic){

  if(!dir.exists(dest_dir)){
    dir.create(dest_dir)
  }else{
    print("Destination directory for thematic already exists")
  }
}

# CLICK RAPPORT SELECTIE FUNCTION
click_rapport_selectie = function(remDr){
  log_info(paste0("Selecting rapport selectie"))
  rapportselectie = remDr$findElement(using = "xpath", "//*[contains(text(), 'Rapportselectie')]")

  while (is.null(rapportselectie)){
    print("Waiting for rapport selectie button to show up...")
    Sys.sleep(1)
    rapportselectie = remDr$findElement(using = "xpath", "//*[contains(text(), 'Rapportselectie')]")
  }

  rapportselectie$clickElement() # hack to hid the calendar pop-up
}

# CLICK AANGEVRAAGDRAPPORT SELECTIE FUNCTION
click_aangevraagd_rapport = function(remDr){
  log_info(paste0("Selecting aangevraagd rapport"))
  aangevraagd_rapport = remDr$findElement(using = "xpath", "//*[contains(text(), 'Aangevraagde rapporten')]")

  while (is.null(aangevraagd_rapport)){
    print("Waiting for aangevraagd_rapport tab to show up...")
    Sys.sleep(1)
    aangevraagd_rapport = remDr$findElement(using = "xpath", "//*[contains(text(), 'Aangevraagde rapporten')]")
  }

  aangevraagd_rapport$clickElement() # hack to hid the calendar pop-up
}

# GET THE PERIOD DATE FUNCTION ----
get_period_date = function(num_days){ # use num_day = 0 for today
  current_hour = as.numeric(format(Sys.time(), "%H"))
  if(current_hour > 20){
    period_date = format(Sys.Date() - num_days, '%d/%m/%Y')
  }else{
    period_date = format(Sys.Date() -1 - num_days, '%d/%m/%Y')
  }
  return(period_date)
}

# INITIATE THE DRIVER FUNCTION ----
# https://stackoverflow.com/questions/60170311/how-to-switch-download-directory-using-selenium-firefox-python
driver_initiate = function(selenium_host_name, selenium_port){
  log_info(paste0("Initiating the Selenium Driver"))
  # setting the firefox profile info for auto xlsx MIME type download
  ePrefs = makeFirefoxProfile(
    list(
      "browser.download.dir" = paste0("/home/seluser/Downloads"), # see docker compose file
      #"browser.download.dir" = paste0("/home/seluser/Downloads", "/", download_dir),
      "browser.download.folderList" = 2L,
      "browser.helperApps.alwaysAsk.force" = FALSE,
      "browser.download.manager.showWhenStarting" = FALSE,
      "browser.download.manager.showAlertOnComplete" = FALSE,
      "browser.helperApps.neverAsk.saveToDisk" = "application/zip,application/octet-stream,application/x-zip-compressed,multipart/x-zip,application/x-rar-compressed, application/octet-stream,application/msword,application/vnd.ms-word.document.macroEnabled.12,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/rtf,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/vnd.ms-excel,application/vnd.ms-word.document.macroEnabled.12,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/xls,application/msword,text/csv,application/vnd.ms-excel.sheet.binary.macroEnabled.12,text/plain,text/csv/xls/xlsb,application/csv,application/download,application/vnd.openxmlformats-officedocument.presentationml.presentation,application/octet-stream"
    ))

  # passing the profile to the selenium driver and opening it
  remDr = remoteDriver(
    remoteServerAddr = selenium_host_name,
    extraCapabilities = ePrefs,
    port = as.numeric(selenium_port))
}

# LAUNCH THE DRIVER FUNCTION ----
driver_launch = function(remDr){
  remDr$open()
}

# CLOSE THE DRIVER FUNCTION ----
driver_close = function(remDr){
  remDr$close() #was closeDriver
}

# GO TO TILROY HOMEPAGE AND LOGIN FUNCTION ----
go_to_tilroy = function(remDr, user, pwd){
  log_info(paste0("Navigating to Tilroy homepage"))
  # define the url of the vgc tilroy homepage
  tilroy_url = "https://ve2945.tilroy.com/index.cfm/login"

  # navigate to the vgc tilroy homepage
  tilroy = remDr$navigate(tilroy_url)
  remDr$refresh()


  login = remDr$findElement("id", "edtUser")
  while(is.null(login)){
    Sys.sleep(1)
    login = remDr$findElement("id", "edtUser")
  }
  # input the login credentials. If login is loaded, password is also !
  login$sendKeysToElement(list(user))
  remDr$findElement("id", "edtPassword")$sendKeysToElement(list(pwd))

  # click on login button
  remDr$findElement("name", "btnSubmit")$clickElement()

}

# GO TO CENTRALE NEW VERSATO FUNCTION ----
go_to_centrale_new_versato = function(remDr){

  # # Choose centrale New versato based on a css selector as no id or name attributes
  # centrale_new_versato_url = "https://ve2945.tilroy.com/index.cfm/shoptill/2414"
  # centrale_new_versato = remDr$navigate(centrale_new_versato_url)
  #
  # # navigate to shop 1
  # shop_one_url = "https://ve2945.tilroy.com/index.cfm/chooseTill/2732/1/?locktill=1"
  # shop_one= remDr$navigate(shop_one_url)
  #

  # Choose centrale New versato based on a css selector as no id or name attributes
  centrale_new_versato = remDr$findElement(using = "css", "div.shops>ul>a:first-child")
  while(is.null(centrale_new_versato)){
    Sys.sleep(1)
    centrale_new_versato = remDr$findElement(using = "css", "div.shops>ul>a:first-child")
  }
  log_info(paste0("Selecting Centrale New Versato"))
  centrale_new_versato$clickElement()
  # Choose the first shop
  remDr$findElement(using = "css", "div.tills>ul>a:first-child")$clickElement()

}


# GO TO DATABEHEER FUNCTION ----
go_to_databeheer = function(remDr, user, pwd){
  log_info(paste0("Selecting databeheer"))
  # Go to databeheer
  remDr$refresh()
  databeheer = remDr$findElement(using = "xpath", "//*[contains(text(), 'Databeheer')]")
  while(is.null(databeheer)){
    Sys.sleep(1)
    databeheer = remDr$findElement(using = "xpath", "//*[contains(text(), 'Databeheer')]")
  }
  databeheer$clickElement()

  # Re-enter credentials in the pop-up
  gebruikersnaam = remDr$findElement("id", "edtUserName")
  wachtwoord = remDr$findElement("id", "edtPassword")
  login = remDr$findElement(using = "xpath", "//*[contains(text(), 'login')]")

  while(is.null(login)){
    Sys.sleep(1)
    login = remDr$findElement(using = "xpath", "//*[contains(text(), 'login')]")
  }
  gebruikersnaam$sendKeysToElement(list(user))
  wachtwoord$sendKeysToElement(list(pwd))

  login$clickElement()

}

# GO TO RAPPORTERING FUNCTION ----
go_to_rapportering = function(remDr){

  log_info(paste0("Selecting menu Rapportering"))
  rapportering = remDr$findElement(using = "xpath", "//a[contains(@href,'/index.cfm/admin/reporting')]")

  while(is.null(rapportering)){
    Sys.sleep(1)
    rapportering= remDr$findElement(using = "xpath", "//a[contains(@href,'/index.cfm/admin/reporting')]")

  }
  rapportering$clickElement()

}
pokyah/tilroyExtractoR documentation built on Feb. 4, 2021, 2:32 p.m.