R/kgl_flow_utils.R

Defines functions kgl_flow_data_download validator_path_competition_id

validator_path_competition_id <- function(
  path_competition_id,
  competition_id,
  dir_path,
  dir_meta
    ) {
  if (fs::file_exists(path_competition_id)) {
    logged_competition_id <- readLines(path_competition_id)[2]

    if (logged_competition_id != competition_id) {
      user_response <- usethis::ui_yeah(
        "The previously used competition_id is not the same as the input id. It is recommended that there is one Rproj per kaggle competition. Do yo want to clear the content of {.kgl_dir_ui}?
        - Recorded ID: {usethis::ui_value(logged_competition_id)}
        - Clean Input ID: {usethis::ui_value(competition_id)}"
      )

      if (user_response) {
        fs::dir_delete(dir_path)

        fs::dir_create(dir_path)
        fs::dir_create(dir_meta)

        usethis::write_union(
          path = path_competition_id,
          lines = c(
            "# Generated by kaggler: do not edit by hand",
            competition_id
          ),
          quiet = TRUE
        )
      } else {
        usethis::ui_oops(
          "{usethis::ui_value('kgl_flow()')} depends on the previously used competition ID being the same as the input. To proceed do one of thoe following:
          - Update the input ID to the previous value
          - Re-run {usethis::ui_value('kgl_flow()')} and choose to remove the contents in {.kgl_dir_ui}
          - Start a new R project."
        )
        return(invisible())
      }
    }
  } else {
    fs::dir_create(dir_meta)

    usethis::write_union(
      path = path_competition_id,
      lines = c(
        "# Generated by kaggler: do not edit by hand",
        competition_id
      ),
      quiet = TRUE
    )
  }
}

kgl_flow_data_download <- function(
  competition_id,
  file_name,
  dir_name
    ) {
  get_url <- glue::glue("competitions/data/download/{competition_id}/{file_name}")

  get_request <- kgl_api_get(get_url)

  if (httr::status_code(get_request) != 200) {
    return(invisible(get_request))
  }

  get_url <- get_request$url

  get_ext <- fs::path_ext(get_url)

  path_temp <- fs::path(
    dir_name,
    file_name %>% fs::path_ext_remove(),
    ext = get_ext
  )

  file_name_value <- usethis::ui_value(file_name)
  usethis::ui_todo("Downloading {file_name_value}...")

  download.file(
    url = get_url,
    destfile = path_temp,
    mode = "wb",
    quiet = TRUE
  )

  if (get_ext == "zip") {
    usethis::ui_todo("Zip file detected! Unzipping...")

    unzip_result <- suppressWarnings(unzip(
      path_temp,
      exdir = dir_name,
      overwrite = TRUE
    ))

    fs::file_delete(path_temp)
  }

  path_d <- fs::path(dir_name, file_name)

  if (!fs::file_exists(path_d)) {
    usethis::ui_oops("File does not exist! Something went wrong :(")
    return(NULL)
  }

  path_ext <- fs::path_ext(path_d)

  if (path_ext == "csv") {
    d <- shh(readr::read_csv(
      file = path_d,
      col_types = readr::cols(),
      progress = FALSE
    ))
    d_rows <- nrow(d)
    d_cols <- ncol(d)
  } else {
    d_rows <- NA_integer_
    d_cols <- NA_integer_
  }

  d_meta <-
    dplyr::tibble(
      name = file_name,
      download_time = shh(lubridate::now()),
      nrows = d_rows,
      ncols = d_cols
    )

  return(invisible(d_meta))
}
KoderKow/kaggler documentation built on Aug. 26, 2023, 11:27 a.m.