R/utils.R

Defines functions format_file_size convert_file_size replace_month ct_commodity_lookup ct_download_ref_table ct_get_ref_table ct_migrate_cache get_primary_comtrade_key set_primary_comtrade_key

Documented in ct_commodity_lookup ct_get_ref_table ct_migrate_cache get_primary_comtrade_key set_primary_comtrade_key

#' Set your primary Comtrade API key in the environment variable
#'
#' If you would like your Comtrade API key to persist in between sessions,
#'  use `usethis::edit_r_environ()` to add the env variable COMTRADE_PRIMARY
#'  to your environment file.
#'
#' @param key Provide your primary comtrade key
#'
#' @returns Saves your comtrade primary key in the environment.
#' @export
#' @examplesIf interactive()
#' ## set API key
#' set_primary_comtrade_key("xxxxxc678ca4dbxxxxxxxx8285r3")
#'
set_primary_comtrade_key <- function(key = NULL) {
  if (is.null(key)) {
    key <- askpass::askpass("Please enter your API key")
  }
  Sys.setenv("COMTRADE_PRIMARY" = key)
}

#' get_primary_comtrade_key
#'
#' If you would like your Comtrade API key to persist in between sessions,
#' use `usethis::edit_r_environ()` to add the env variable COMTRADE_PRIMARY
#' to your environment file.
#'
#' @returns Gets your primary comtrade key from the environment
#' var COMTRADE_PRIMARY
#' @export
#' @examplesIf interactive()
#' ## get API key
#' get_primary_comtrade_key()
#'
get_primary_comtrade_key <- function() {
  key <- Sys.getenv("COMTRADE_PRIMARY")
  if (!identical(key, "")) {
    return(key)
  } else {
    rlang::abort("No API key found, please supply with `set_primary_comtrade_key` function or set COMTRADE_PRIMARY env var") #nolint
  }
}

#' Migrate cache to new location
#'
#' Comtradr versions previous to version 1.0.1 have used a cache
#' location that was not CRAN compliant. You can migrate any remaining files to
#' the new cache location using this function. It will delete the old cache.
#'
#'
#' @returns Nothing
#' @export
#' @examplesIf interactive()
#' ct_migrate_cache()
#'
ct_migrate_cache <- function(){
  ## check that the user has not set a cache manually in R_USER_CACHE_DIR
  ## and hence tools and rappdirs would return the same

  ## also checking that there is actually any files in it
  if((rappdirs::user_cache_dir('comtradr')!=
     tools::R_user_dir('comtradr', which = 'cache')) &&
    (length(list.files(rappdirs::user_cache_dir('comtradr')))>0)){
    fs::dir_copy(rappdirs::user_cache_dir('comtradr'),
                 tools::R_user_dir('comtradr', which = 'cache'),
                 overwrite = TRUE)
  }

  ## if not all files are copied over, abort and throw error
  if(!all(list.files(rappdirs::user_cache_dir('comtradr')) %in%
         list.files(tools::R_user_dir('comtradr', which = 'cache')))){
    cli::cli_abort('x' = 'Something went wrong while copying.')
  }

  if(fs::dir_exists(rappdirs::user_cache_dir('comtradr'))){
    fs::dir_delete(rappdirs::user_cache_dir('comtradr'))
  }
  ## as above but for the bulk cache directory
  if((rappdirs::user_cache_dir('comtradr_bulk')!=
      tools::R_user_dir('comtradr_bulk', which = 'cache')) &&
     (length(list.files(rappdirs::user_cache_dir('comtradr_bulk')))>0)){
    fs::dir_copy(rappdirs::user_cache_dir('comtradr_bulk'),
                 tools::R_user_dir('comtradr_bulk', which = 'cache'),
                 overwrite = TRUE)
  }

  if(!all(list.files(rappdirs::user_cache_dir('comtradr_bulk')) %in%
          list.files(tools::R_user_dir('comtradr_bulk', which = 'cache')))){
    cli::cli_abort('x' = 'Something went wrong while copying.')
  }
  if(fs::dir_exists(rappdirs::user_cache_dir('comtradr_bulk'))){
    fs::dir_delete(rappdirs::user_cache_dir('comtradr_bulk'))
  }
}

#' Get reference table from package data
#'
#' The first time, the function will read from disk, the second time from the
#' environment. In the case of a necessary update the new data will be saved
#' to the environment for the current session.
#' You can use this table to look at the reference tables and if necessary
#' extract respective classification codes by hand. In general we would
#' recommend the function `ct_commodity_lookup` for this purpose.
#' It uses the present function in the backend.
#'
#' @details The function allows you to query most possible input parameters
#' that are listed by the Comtrade API. The following dataset_ids are permitted:
#' * Datasets that contain codes for the `commodity_code` argument. The name is
#' the same as you would provide under `commodity_classification`.
#'   * 'HS' This is probably the most common classification for goods.
#'   * 'B4'
#'   * 'B5'
#'   * 'EB02'
#'   * 'EB10'
#'   * 'EB10S'
#'   * 'EB'
#'   * 'S1'
#'   * 'S2'
#'   * 'S3'
#'   * 'S4'
#'   * 'SS'
#'
#' * 'reporter'
#' * 'partner'
#' * 'mode_of_transport'
#' * 'customs_code'
#' * 'flow_direction'
#'
#' @param dataset_id The dataset ID, which is either partner,
#' reporter or a valid classification scheme.
#' @inheritParams ct_get_data
#' @export
#' @returns a tidy dataset with a reference table
#'
#' @examplesIf interactive()
#' ## get HS commodity table
#' ct_get_ref_table("HS")
#'
#' ## get reporter table
#' ct_get_ref_table("reporter")
#'
ct_get_ref_table <- function(dataset_id, update = FALSE, verbose = FALSE) {
  ## make switch to the name of the datasets,
  ## which are slightly different to the dataset_ids
  switch_list <- c(
    "B4" = "cmd_b4",
    "B5" = "cmd_b5",
    "EB02" = "cmd_eb02",
    "EB10" = "cmd_eb10",
    "EB10S" = "cmd_eb10s",
    "EB" = "cmd_eb",
    "HS" = "cmd_hs",
    "H0" = "cmd_h0",
    "H1" = "cmd_h1",
    "H2" = "cmd_h2",
    "H3" = "cmd_h3",
    "H4" = "cmd_h4",
    "H5" = "cmd_h5",
    "H6" = "cmd_h6",
    "S1" = "cmd_s1",
    "S2" = "cmd_s2",
    "S3" = "cmd_s3",
    "S4" = "cmd_s4",
    "SS" = "cmd_ss",
    "reporter" = "reporter",
    "partner" = "partner",
    "mode_of_transport" = "mot",
    "flow_direction" = "flow",
    "customs_code" = "customs"
  )

  ## check dataset id for valid values
  rlang::arg_match(dataset_id, values = names(switch_list))

  ## assign the proper file name to a new variable
  ref_table_id <- switch_list[dataset_id]

  ## attempt to return the data from the environment first
  data <- get(dataset_id, envir = ct_env)

  ## if the dataset is not yet loaded into the environment
  ## read it from disk and save to environment
  if (is.null(data)) {
    data <- fs::path_package(paste0("extdata/", ref_table_id, ".rds"),
      package = "comtradr"
    ) |>
      readr::read_rds()
    assign(dataset_id, data, envir = ct_env)
  }

  if (update & any(dataset_id %in% ct_env$updated)) {
    ## if update is true, but dataset_id has already been updated once
    ## only return message
    if (verbose) {
      cli::cli_inform(c("i" = paste0(
        "Already checked for updates for ",
        dataset_id, " in this session."
      )))
    }
    return(data)
  } else if (update) {
    ## if update is true and not yet updated in this
    ## session inform user that update process is starting
    if (verbose) {
      cli::cli_inform(c("i" = paste0(
        "Attempting to update reference table: ",
        dataset_id
      )))
    }

    ## download new reference table from the UN
    data_new <- ct_download_ref_table(ref_table_id = ref_table_id)

    if (unique(data_new$last_modified) > unique(data$last_modified)) {
      ## if the date last modified, returned in
      ## the header is newer than the old data
      if (verbose) {
        cli::cli_inform(c("i" = paste0(
          "Updated reference tables ",
          dataset_id,
          " with new data, last modified on: ",
          unique(data_new$last_modified)
        ))) # nolint
      }

      ## write to environment and overwrite old data
      assign(dataset_id, data_new, envir = ct_env)

      ## let environment variable know that dataset has been updated
      ct_env$updated <- c(ct_env$updated, dataset_id)

      return(data_new)
    } else {
      ## if last_modified is not newer,
      ## let user know that datasets are up to date.
      if (verbose) {
        cli::cli_inform(c("i" = paste0(
          "No update necessary for table ",
          dataset_id, "."
        )))
      }

      ## save in env variable, that update has been checked in this session
      ct_env$updated <- c(ct_env$updated, dataset_id)

      return(as.data.frame(data))
    }
  } else {
    ## if no update parameter passed on,
    ## just return the data read from disk or the env
    return(as.data.frame(data))
  }
}



#' Downloading the references tables from UN Comtrade
#'
#' @noRd
ct_download_ref_table <- function(ref_table_id) {
  iso_3 <- id <- group <- category <-
    text <- reporterCodeIsoAlpha3 <- entryEffectiveDate <- NULL
  entryExpiredDate <- isGroup <- PartnerCodeIsoAlpha3 <- country <- NULL

  ## attempt to get list of datasets of the UN from the env
  datasets <- get("list_of_datasets", envir = ct_env)
  if (is.null(datasets)) {
    ## if not in env read from disk
    path_datasets <-
      fs::path_package("extdata/list_of_datasets.rda", package = "comtradr")
    load(path_datasets, envir = ct_env)
  }

  ## filter to queried ref_table
  datasets <- get("list_of_datasets", envir = ct_env) |>
    poorman::filter(category == ref_table_id)

  ## download reference table from UN
  response <- httr2::request(datasets$fileuri) |>
    httr2::req_perform()

  ## parse response
  data <- response |>
    httr2::resp_body_json(simplifyVector = TRUE)

  ## get date of last modification from headers
  last_modified <-
    httr2::resp_header(header = "Last-Modified", resp = response) |>
    stringr::str_extract(pattern = "(\\d{2} [a-zA-Z]+ \\d{4})") |>
    replace_month() |>
    as.Date(format = "%d %m %Y")


  ## get results from json file
  data <- data$results

  ## write last modification time to ref table
  data$last_modified <- last_modified

  ## cleaning for reporter and partner
  if (ref_table_id %in% c("reporter", "partner")) {
    if (ref_table_id == "reporter") {
      data <- data |>
        poorman::transmute(
          id,
          country = text,
          iso_3 = reporterCodeIsoAlpha3,
          entry_year = lubridate::year(entryEffectiveDate),
          exit_year = lubridate::year(entryExpiredDate),
          group = isGroup,
          last_modified
        )
    } else {
      data <- data |>
        poorman::transmute(
          id,
          country = text,
          iso_3 = PartnerCodeIsoAlpha3,
          entry_year = lubridate::year(entryEffectiveDate),
          exit_year = lubridate::year(entryExpiredDate),
          group = isGroup,
          last_modified
        ) |>
        poorman::mutate(iso_3 = ifelse(country == "World", "World", iso_3))
    }
    return(data)
  } else {
    return(data)
  }
}



#' UN Comtrade commodities database query
#'
#' The Comtrade API requires that searches for specific commodities be done
#' using commodity codes. This is a helper function for querying the
#' Comtrade commodity database. It takes as input a vector of
#' commodities or commodity codes. Output is a list or vector of commodity
#' descriptions or codes associated with the input search_terms. For use with
#' the UN Comtrade API, full API docs can be found at
#' \url{https://unstats.un.org/wiki/display/comtrade/}
#'
#' @inheritParams ct_get_data
#'
#' @param search_terms Commodity names or commodity codes, as a char or numeric
#'  vector.
#'
#' @param return_code Logical, if set to FALSE, the function will return a
#'  set of commodity descriptions along with commodity codes (as a single
#'  string for each match found), if set to TRUE it will return only the
#'  commodity codes. Default value is FALSE.
#' @param return_char Logical, if set to FALSE, the function will return the
#'  matches as a named list, if set to TRUE it will return them as a character
#'  vector. Default value is FALSE.
#' @param verbose Logical, if set to TRUE, a warning message will print to
#'  console if any of the elements of input "search_terms" returned no matches
#'  (message will indicate which elements returned no data). Default is TRUE.
#' @param ignore.case logical, to be passed along to arg ignore.case within
#'  \code{\link{grepl}}. Default value is TRUE.
#' @param ... additional args to be passed along to \code{\link{grepl}}.
#'
#' @return A list or character vector of commodity descriptions and/or
#'  commodity codes that are matches with the elements of "search_terms".
#'
#' @details This function uses regular expressions (regex) to find matches
#'  within the commodity DB. This means it will treat as a match any commodity
#'  description that contains the input search term. For more on using regex
#'  within R, see
#'  \url{https://stat.ethz.ch/R-manual/R-devel/library/base/html/regex.html}
#'
#' @export
#'
#' @seealso \code{\link{grepl}}
#'
#' @examplesIf interactive()
#' comtradr::ct_commodity_lookup("wine")
ct_commodity_lookup <- function(search_terms,
                                return_code = FALSE,
                                commodity_classification = "HS",
                                type = "goods",
                                return_char = FALSE,
                                verbose = TRUE,
                                ignore.case = TRUE,
                                update = FALSE,
                                ...) {
  stopifnot(mode(search_terms) %in% c("numeric", "character"))
  search_terms <- as.character(search_terms)

  commodity_classification <- check_clCode(
    check_type(type),
    commodity_classification,
    bulk = FALSE
  )

  # Fetch the commodity database from ct_env.
  commodity_df <- ct_get_ref_table(
    dataset_id = commodity_classification,
    update,
    verbose
  )


  # transform input arg "return_code" to match the col name indicated
  # (TRUE == "code", FALSE == "commodity").
  if (return_code) {
    return_col <- "id"
  } else {
    return_col <- "text"
  }

  # For each element of input arg "search_terms", fetch all commodity
  # descriptions and/or codes from the database. Output will be a list.
  ans <- purrr::map(search_terms, function(x) {
    # Determine whether the param 'value' is a commodity or a code, then
    # perform the look up.
    if (grepl("[A-z]", x)) {
      lu <- "text"
    } else {
      lu <- "id"
    }
    commodity_df[grepl(x, commodity_df[[lu]],
      ignore.case = ignore.case
    ), return_col]
  })

  # If "verbose" == TRUE, create warning message if any of the elements of
  # input arg "search_terms" produced no search results.
  if (verbose) {
    check_len <- purrr::map_int(ans, length)
    if (any(check_len == 0)) {
      if (any(check_len > 0)) {
        msg <- paste0(
          "There were no matching results found for inputs: ",
          paste(search_terms[which(check_len == 0)], collapse = ", ")
        )
      } else {
        msg <- "There were no matching results found"
      }
      cli::cli_warn(msg)
    }
  }

  # If "return_char" == TRUE, unlist obj "ans". Otherwise, assign names to the
  # elements of obj "ans" (names will be taken from input arg "search_terms").
  if (return_char) {
    ans <- unlist(ans, FALSE, FALSE)
  } else {
    names(ans) <- search_terms
  }
  return(ans)
}

#' replacing month with numbers locale unspecific
#'
#' @noRd
replace_month <- function(date_str) {
  months <- c(
    "Jan" = "01", "Feb" = "02", "Mar" = "03",
    "Apr" = "04", "May" = "05", "Jun" = "06",
    "Jul" = "07", "Aug" = "08", "Sep" = "09",
    "Oct" = "10", "Nov" = "11", "Dec" = "12"
  )
  stringr::str_replace_all(date_str, months)
}


#' convert file size from comtrade
#'
#' @noRd
convert_file_size <- function(file_sizes) {
  units <- c(B = 1, KB = 1024, MB = 1024^2, GB = 1024^3, TB = 1024^4)
  sapply(file_sizes, function(x) {
    parts <- strsplit(x, " ")[[1]]
    number <- as.numeric(parts[1])
    unit <- units[toupper(parts[2])]
    number * unit
  })
}

#' format file size from comtrade
#'
#' @noRd
format_file_size <- function(size_in_bytes) {
  units <- c("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
  if (size_in_bytes == 0) {
    return("0 B")
  }
  i <- floor(log(size_in_bytes, 1024))
  p <- size_in_bytes / 1024^i
  paste0(format(p, digits = 3, nsmall = 1), " ", units[i + 1])
}

Try the comtradr package in your browser

Any scripts or data that you put into this service are public.

comtradr documentation built on Sept. 30, 2024, 9:43 a.m.