#' Find and write countries which need manual processing
#' @description
#' This functions reads the file with errors for the current date in the folder [issues](https://github.com/finddx/FINDCov19TrackerData/tree/master/issues),
#' and generates a template for the current day in [manual/need-processing/](https://github.com/finddx/FINDCov19TrackerData/tree/master/manual/need-processing).
#'
#' This process updates the file `-need-manual-processing.csv` *only* for the current day.
#' To see if there are errors for past dates check the folder [issues](https://github.com/finddx/FINDCov19TrackerData/tree/master/issues)
#'
#' For the automated countries with error `test_cumulative` is set to 0 in the templates [manual/need-processing/](https://github.com/finddx/FINDCov19TrackerData/tree/master/manual/need-processing).
#'
#'
#' @importFrom readr read_csv write_csv
#' @importFrom dplyr pull filter rename
#' @importFrom tibble add_column
#' @import readr
#' @export
calc_manual_countries <- function() {
# read list of all countries
countries_all <- readr::read_csv(
"https://raw.githubusercontent.com/finddx/FINDCov19TrackerData/master/resources/countries-urls.csv",
cols(
country = col_character(),
jhu_ID = col_character(),
source = col_character(),
`alternative link` = col_character(),
type = col_character(),
data_url = col_character(),
date_format = col_character(),
xpath_cumul = col_character(),
xpath_new = col_character(),
backlog = col_character(),
comment = col_character(),
status = col_character()
),
col_names = TRUE, quoted_na = FALSE
) %>% # nolint
dplyr::select(jhu_ID, status, source) %>%
dplyr::rename(country = jhu_ID, url = source)
today <- format(Sys.time(), "%Y-%m-%d")
countries_error <- readr::read_csv(sprintf("https://raw.githubusercontent.com/finddx/FINDCov19TrackerData/master/issues/all-countries-error.csv", as.character(Sys.Date(), format = "%Y-%m-%d")), # nolint
col_types = cols(
country = col_character(),
date = col_date(format = ""),
tests_cumulative = col_double(),
new_tests = col_double(),
tests_cumulative_corrected = col_double(),
new_tests_corrected = col_double(),
source = col_character()
),
quoted_na = FALSE
) %>% # nolint
dplyr::select(-source) %>%
dplyr::filter(date == today)
# only keep countries which need manual processing (including their
# source URLS)
countries_manual_csv <- countries_error %>%
dplyr::left_join(countries_all) %>%
tibble::add_column(
source = "manually"
) %>%
dplyr::relocate(
country, tests_cumulative, new_tests,
tests_cumulative_corrected, new_tests_corrected,
date, source, status, url
) %>%
dplyr::mutate(new_tests = if_else(
is.na(tests_cumulative) & is.na(new_tests),
0,
new_tests
))
# write csv
readr::write_csv(
data.frame(countries_manual_csv),
"need-manual-processing.csv"
)
}
#' last_update
#' @description
#' last_update
#'
#' @importFrom readr read_csv write_csv
#' @importFrom dplyr pull filter rename
#' @importFrom tibble add_column
#' @import readr
#' @import dplyr
#' @export
updates_dates_countries <- function() {
# read list of all countries
countries_all <- readr::read_csv(
"https://raw.githubusercontent.com/finddx/FINDCov19TrackerData/master/processed/coronavirus_tests.csv",
cols(
country = col_character(),
date = col_date(format = ""),
new_tests = col_double(),
tests_cumulative = col_double(),
jhu_ID = col_character(),
source = col_character(),
new_tests_corrected = col_double(),
tests_cumulative_corrected = col_double()
),
col_names = TRUE, quoted_na = FALSE
) %>% # nolint
dplyr::select(jhu_ID, date, new_tests_corrected, source) %>%
dplyr::rename(country = jhu_ID)
countries_url <- readr::read_csv(
"https://raw.githubusercontent.com/finddx/FINDCov19TrackerData/master/resources/countries-urls.csv",
cols(
country = col_character(),
jhu_ID = col_character(),
source = col_character(),
`alternative link` = col_character(),
type = col_character(),
data_url = col_character(),
date_format = col_character(),
xpath_cumul = col_character(),
xpath_new = col_character(),
backlog = col_character(),
comment = col_character(),
status = col_character()
),
col_names = TRUE, quoted_na = FALSE
) %>% # nolint
dplyr::select(jhu_ID, source) %>%
dplyr::rename(country = jhu_ID, url = source)
today <- format(Sys.time(), "%Y-%m-%d")
# only keep countries which need manual processing (including their
# source URLS)
countries_updates <- countries_all %>%
dplyr::group_by(country) %>%
dplyr::arrange(country, desc(date)) %>%
dplyr::mutate(date_change = if_else(
new_tests_corrected > 0,
date,
as.Date("2018-01-01")
)) %>%
dplyr::mutate(date_new_tests = max(date_change)) %>%
dplyr::mutate(dates_no_update = if_else(
date < date_new_tests,
0,
1
)) %>%
dplyr::summarise(last_update = max(date_new_tests),
days_no_update = sum(dates_no_update) - 1) %>%
dplyr::arrange(desc(days_no_update)) %>%
dplyr::left_join(countries_url)
# write csv
readr::write_csv(
data.frame(countries_updates),
"countries-last-update.csv"
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.