R/reports.R

Defines functions get_brt_world_bank_classification get_brt_predictions get_brt_model out_4parameter_list out_3parameter_list reports_all reports_4parameter_day reports_3parameter_day set_date

# Set global variable date_0
#' @param date ISO date to be used for date_0
set_date <- function(date) {
  date_0 <<- date
}

#' Get lmic reports when using 3 parameters
#'
#' @param date ISO Date of reports
#'
#' @return data.frame of report ids and iso3c countries
#' @import orderly
#' @export
reports_3parameter_day <- function(date = NULL) {

  wd <- file.path(here::here(),"analysis/data/raw_data/server_results/")
  wdold <- getwd()
  setwd(wd)

  db <- orderly::orderly_db("destination")
  if (is.null(date)) {
    date <- as.character(Sys.Date())
  }

  ## First find the id corresponding to the ecdc report with data.  If
  ## there are more than one, it's not totally clear what you want to
  ## do as you might want to take the earliest or the latest.
  ## Probably we want to take *all* and do the join over that, which
  ## is easy enough to do if you replace the '= $1' and replace with
  ## 'IN (%s)' and interpolate 'paste(sprintf('"%s"', id), collapse = ", ")'
  sql <- 'SELECT report_version.id
            FROM report_version
            JOIN parameters
              ON parameters.report_version = report_version.id
           WHERE report_version.report = "ecdc"
             AND parameters.value = $1'
  id <- DBI::dbGetQuery(db, sql, date)$id
  if (length(id) == 0L) {
    stop(sprintf("No 'ecdc' report for '%s'", as.character(date)))
  }

  ## Then find all lmic_reports reports that use files from this ecdc
  ## report.  This is a bit awful and I might add direct link or a
  ## view to make this easier at some point.
  sql <- 'SELECT report_version.id, parameters.value as country
            FROM report_version_artefact
            JOIN file_artefact
              ON file_artefact.artefact = report_version_artefact.id
            JOIN depends
              ON depends.use = file_artefact.id
            JOIN report_version
              ON report_version.id = depends.report_version
            JOIN parameters
              ON parameters.report_version = report_version.id
           WHERE report_version_artefact.report_version IN (%s)
             AND report = "lmic_reports_google_pmcmc_no_decouple"
             AND parameters.name = "iso3c"
           ORDER BY country, report_version.id'
  sql <- sprintf(sql, paste(sprintf('"%s"', id), collapse = ", "))
  reports <- DBI::dbGetQuery(db, sql)

  if (any(duplicated(reports$country))) {
    keep <- tapply(seq_len(nrow(reports)), reports$country, max)
    reports <- reports[keep, ]
    rownames(reports) <- NULL
  }

  reports$date <- as.character(date)
  DBI::dbDisconnect(db)
  setwd(wdold)
  return(reports)
}

#' Get lmic reports using 4 parameters
#'
#' @param date ISO Date of reports
#'
#' @return data.frame of report ids and iso3c countries
#' @import orderly
#' @export
reports_4parameter_day <- function(date = NULL) {

  wd <- file.path(here::here(),"analysis/data/raw_data/server_results/")
  wdold <- getwd()
  setwd(wd)

  db <- orderly::orderly_db("destination")
  if (is.null(date)) {
    date <- as.character(Sys.Date())
  }

  ## First find the id corresponding to the ecdc report with data.  If
  ## there are more than one, it's not totally clear what you want to
  ## do as you might want to take the earliest or the latest.
  ## Probably we want to take *all* and do the join over that, which
  ## is easy enough to do if you replace the '= $1' and replace with
  ## 'IN (%s)' and interpolate 'paste(sprintf('"%s"', id), collapse = ", ")'
  sql <- 'SELECT report_version.id
            FROM report_version
            JOIN parameters
              ON parameters.report_version = report_version.id
           WHERE report_version.report = "ecdc"
             AND parameters.value = $1'
  id <- DBI::dbGetQuery(db, sql, date)$id
  if (length(id) == 0L) {
    stop(sprintf("No 'ecdc' report for '%s'", as.character(date)))
  }

  ## Then find all lmic_reports reports that use files from this ecdc
  ## report.  This is a bit awful and I might add direct link or a
  ## view to make this easier at some point.
  sql <- 'SELECT report_version.id, parameters.value as country
            FROM report_version_artefact
            JOIN file_artefact
              ON file_artefact.artefact = report_version_artefact.id
            JOIN depends
              ON depends.use = file_artefact.id
            JOIN report_version
              ON report_version.id = depends.report_version
            JOIN parameters
              ON parameters.report_version = report_version.id
           WHERE report_version_artefact.report_version IN (%s)
             AND report = "lmic_reports_google_pmcmc"
             AND parameters.name = "iso3c"
           ORDER BY country, report_version.id'
  sql <- sprintf(sql, paste(sprintf('"%s"', id), collapse = ", "))
  reports <- DBI::dbGetQuery(db, sql)

  if (any(duplicated(reports$country))) {
    keep <- tapply(seq_len(nrow(reports)), reports$country, max)
    reports <- reports[keep, ]
    rownames(reports) <- NULL
  }

  reports$date <- as.character(date)
  DBI::dbDisconnect(db)
  setwd(wdold)
  return(reports)
}

#' @noRd
reports_all <- function() {

  wd <- file.path(here::here(),"analysis/data/raw_data/server_results/")
  wdold <- getwd()
  setwd(wd)

  db <- orderly::orderly_db("destination")

  ## First find the id corresponding to the ecdc report with data
  sql <- 'SELECT report_version.id
            FROM report_version
            JOIN parameters
              ON parameters.report_version = report_version.id
           WHERE report_version.report = "ecdc"'

  id <- DBI::dbGetQuery(db, sql)$id

  # 4p
  # ----------------------------------------------------------------------------

  sql <- paste0('SELECT report_version.id, parameters.value as country
            FROM report_version_artefact
            JOIN file_artefact
              ON file_artefact.artefact = report_version_artefact.id
            JOIN depends
              ON depends.use = file_artefact.id
            JOIN report_version
              ON report_version.id = depends.report_version
            JOIN parameters
              ON parameters.report_version = report_version.id
           WHERE report_version_artefact.report_version IN (%s)
             AND report = "', "lmic_reports_google_pmcmc", '"
             AND parameters.name = "iso3c"
           ORDER BY country, report_version.id')
  sql <- sprintf(sql, paste(sprintf('"%s"', id), collapse = ", "))
  reports <- DBI::dbGetQuery(db, sql)

  all <- DBI::dbGetQuery(db, 'select * from parameters')
  all <- all[all$name == "date",]
  reports$date <- all$value[match(reports$id, all$report_version)]

  # keep max on each day
  max_reports <- group_by(reports, country, date) %>% summarise(id = max(id))

  # 3p
  # ----------------------------------------------------------------------------

  sql <- paste0('SELECT report_version.id, parameters.value as country
            FROM report_version_artefact
            JOIN file_artefact
              ON file_artefact.artefact = report_version_artefact.id
            JOIN depends
              ON depends.use = file_artefact.id
            JOIN report_version
              ON report_version.id = depends.report_version
            JOIN parameters
              ON parameters.report_version = report_version.id
           WHERE report_version_artefact.report_version IN (%s)
             AND report = "', "lmic_reports_google_pmcmc_no_decouple", '"
             AND parameters.name = "iso3c"
           ORDER BY country, report_version.id')
  sql <- sprintf(sql, paste(sprintf('"%s"', id), collapse = ", "))
  reports <- DBI::dbGetQuery(db, sql)

  all <- DBI::dbGetQuery(db, 'select * from parameters')
  all <- all[all$name == "date",]
  reports$date <- all$value[match(reports$id, all$report_version)]

  # keep max on each day
  max_reports_3p <- group_by(reports, country, date) %>% summarise(id = max(id))

  # finish ---------------------------------------------------------------------

  max_reports$model <- "4p"
  max_reports_3p$model <- "3p"

  all_reports <- rbind(max_reports, max_reports_3p)
  all_reports <- all_reports[as.Date(all_reports$date) %in% (as.Date("2020-04-11") + seq(0,84,7)), ]

  DBI::dbDisconnect(db)
  setwd(wdold)
  return(all_reports)
}


#' @noRd
out_3parameter_list <- function(date) {

  reports <- reports_3parameter_day(date)

  grids <- pbapply::pblapply(seq_along(reports$id), function(x) {

    fs <- file.path(here::here(),
                    "analysis/data/raw_data/server_results/archive/lmic_reports_google_pmcmc_no_decouple",
                    reports$id[x],
                    "grid_out.rds")

    return(readRDS(fs))

  })

  names(grids) <- reports$country
  return(grids)
}

#' @noRd
out_4parameter_list <- function(date) {

  reports <- reports_4parameter_day(date)

  grids <- pbapply::pblapply(seq_along(reports$id), function(x) {

    fs <- file.path(here::here(),
                    "analysis/data/raw_data/server_results/archive/lmic_reports_google_pmcmc",
                    reports$id[x],
                    "grid_out.rds")

    return(readRDS(fs))

  })

  names(grids) <- reports$country
  return(grids)
}

#' @noRd
get_brt_model <- function(date) {

  wd <- file.path(here::here(),"analysis/data/raw_data/server_results/")
  wdold <- getwd()
  setwd(wd)

  db <- orderly::orderly_db("destination")
  if (is.null(date)) {
    date <- as.character(Sys.Date())
  }

  ## First find the id corresponding to the ecdc report with data.  If
  ## there are more than one, it's not totally clear what you want to
  ## do as you might want to take the earliest or the latest.
  ## Probably we want to take *all* and do the join over that, which
  ## is easy enough to do if you replace the '= $1' and replace with
  ## 'IN (%s)' and interpolate 'paste(sprintf('"%s"', id), collapse = ", ")'
  sql <- 'SELECT report_version.id
            FROM report_version
            JOIN parameters
              ON parameters.report_version = report_version.id
           WHERE report_version.report = "ecdc"
             AND parameters.value = $1'
  id <- DBI::dbGetQuery(db, sql, date)$id
  if (length(id) == 0L) {
    stop(sprintf("No 'ecdc' report for '%s'", as.character(date)))
  }


  sql <- 'SELECT report_version.id
            FROM report_version
            JOIN parameters
              ON parameters.report_version = report_version.id
           WHERE report_version.report = "brt_google_mobility"
             AND parameters.value = $1'
  sql <- sprintf(sql, paste(sprintf('"%s"', id), collapse = ", "))
  reports <- DBI::dbGetQuery(db, sql, date)
  brt_id_max <- max(reports$id)

  # copy brt
  src <- file.path(wd, "archive", "brt_google_mobility", brt_id_max, "google_brt_model.rds")
  brt <- readRDS(src)
  DBI::dbDisconnect(db)
  return(brt)

  }


#' @noRd
get_brt_predictions <- function(date) {

  wd <- file.path(here::here(),"analysis/data/raw_data/server_results/")
  wdold <- getwd()
  setwd(wd)

  db <- orderly::orderly_db("destination")
  if (is.null(date)) {
    date <- as.character(Sys.Date())
  }

  ## First find the id corresponding to the ecdc report with data.  If
  ## there are more than one, it's not totally clear what you want to
  ## do as you might want to take the earliest or the latest.
  ## Probably we want to take *all* and do the join over that, which
  ## is easy enough to do if you replace the '= $1' and replace with
  ## 'IN (%s)' and interpolate 'paste(sprintf('"%s"', id), collapse = ", ")'
  sql <- 'SELECT report_version.id
            FROM report_version
            JOIN parameters
              ON parameters.report_version = report_version.id
           WHERE report_version.report = "ecdc"
             AND parameters.value = $1'
  id <- DBI::dbGetQuery(db, sql, date)$id
  if (length(id) == 0L) {
    stop(sprintf("No 'ecdc' report for '%s'", as.character(date)))
  }


  sql <- 'SELECT report_version.id
            FROM report_version
            JOIN parameters
              ON parameters.report_version = report_version.id
           WHERE report_version.report = "brt_google_mobility"
             AND parameters.value = $1'
  sql <- sprintf(sql, paste(sprintf('"%s"', id), collapse = ", "))
  reports <- DBI::dbGetQuery(db, sql, date)
  brt_id_max <- max(reports$id)

  # copy brt
  src <- file.path(wd, "archive", "brt_google_mobility", brt_id_max, "google_brt.rds")
  brt <- readRDS(src)
  DBI::dbDisconnect(db)
  return(brt)

}


#' @noRd
get_brt_world_bank_classification <- function(date) {

  wd <- file.path(here::here(),"analysis/data/raw_data/server_results/")
  wdold <- getwd()
  setwd(wd)

  db <- orderly::orderly_db("destination")
  if (is.null(date)) {
    date <- as.character(Sys.Date())
  }

  ## First find the id corresponding to the ecdc report with data.  If
  ## there are more than one, it's not totally clear what you want to
  ## do as you might want to take the earliest or the latest.
  ## Probably we want to take *all* and do the join over that, which
  ## is easy enough to do if you replace the '= $1' and replace with
  ## 'IN (%s)' and interpolate 'paste(sprintf('"%s"', id), collapse = ", ")'
  sql <- 'SELECT report_version.id
            FROM report_version
            JOIN parameters
              ON parameters.report_version = report_version.id
           WHERE report_version.report = "ecdc"
             AND parameters.value = $1'
  id <- DBI::dbGetQuery(db, sql, date)$id
  if (length(id) == 0L) {
    stop(sprintf("No 'ecdc' report for '%s'", as.character(date)))
  }


  sql <- 'SELECT report_version.id
            FROM report_version
            JOIN parameters
              ON parameters.report_version = report_version.id
           WHERE report_version.report = "brt_google_mobility"
             AND parameters.value = $1'
  sql <- sprintf(sql, paste(sprintf('"%s"', id), collapse = ", "))
  reports <- DBI::dbGetQuery(db, sql, date)
  brt_id_max <- max(reports$id)

  # copy brt
  src <- file.path(wd, "archive", "brt_google_mobility", brt_id_max, "World_Bank_Country_Metadata.csv")
  brt <- read.csv(src)
  DBI::dbDisconnect(db)
  return(brt)

}
mrc-ide/global-lmic-meffs documentation built on July 24, 2020, 12:30 a.m.