# Several packages are required for different aspects of  analysis with *R*. 
# You will need to install these before starting. 
# These packages can be quite large and may take a while to download in the
# field. If you have access to a USB key with these packages, it makes sense to
# copy and paste the packages into your computer's R package library 
# (run the command .libPaths() to see the folder path). 
# For help installing packages, please visit https://r4epis.netlify.com/welcome


# set options for rmarkdown 
knitr::opts_chunk$set(echo = FALSE,    # hide all code chunks in output
                      message = FALSE, # hide all messages in output
                      warning = FALSE, # hide all warnings in output
                      collapse = TRUE, # combine all source/output to single block
                      fig.width = 20,  # define figure width
                      fig.height = 8,  # define figure height
                      dpi = 300,       # define figure definitions
                      cache = F)       # run all code chunks (even if repeated)


## Installing required packages for this template
required_packages <- c(
                       "knitr",          # create output docs
                       "here",           # find your files
                       "rio",            # read in data
                       "lubridate",      # work with dates
                       "tsibble",        # for working with dates as time series
                       "slider",         # for calculating rolling averages
                       "dplyr",          # clean/shape data
                       "janitor",        # clean/shape data
                       "tidyr",          # clean/shape data
                       "matchmaker",     # dictionary-based standardization of variables
                       "ggplot2",        # create plots
                       "ggrepel",        # space out overlapping data in ggplot2
                       "flextable",      # for nice tables
                       "grid",           # add flextables to ggplots 
                       "patchwork",      # combine ggplots 
                       "RColorBrewer",   # for defining colour schemes
                       "purrr",          # for running regressions over multiple countries 
                       "broom"           # for cleaning up regression outputs
                       )

for (pkg in required_packages) {
  # install packages if not already present
  if (!pkg %in% rownames(installed.packages())) {
    install.packages(pkg)
  }
  # load packages to this current session 
  library(pkg, character.only = TRUE)
}



## define a theme for ggplot (so all look similar) 
epi_theme <- theme_classic() + 
    theme(
      text = element_text(size = 18, family = "Arial"), 
      ## rotate x axis labels
      axis.text.x  = element_text(angle = 45, vjust = 0.5),
      axis.title   = element_text(color = "black", face = "bold"), 
      legend.title = element_blank(), 
      legend.text  = element_text(color = "black"), 
      legend.position  = "bottom",
      legend.direction = "horizontal",
      ## colour and size the grid lines in the plot 
      # panel.grid.minor = element_line(colour = "grey90", size = 0.5), 
      panel.grid.major = element_line(colour = "grey90")
      )
# This section is used to define your week of interest. It will be used later
# to filter your outputs. 
# You can also set the day which your epiweek starts on - the default for this
# is Monday. 


## define the current date of interest
this_week <- as.Date("2020-12-09")

## week number of current week
No_week <- week(this_week)

## number of expected countries reporting  
expected_countries <- 47
# This chunk reads in all the data necessary as well as dictionaries.

## define path to folder with raw data 
data_folder <- here::here("data", "raw")

## import linelist of confirmed cases 
covid_afro_00 <- 
  paste0(data_folder, "/ConfirmedCases.csv") %>% 
  rio::import() %>% 
  ## clean variable names 
  clean_names() 

## import data dictionary 
cleaning_rules <- rio::import(here::here("data","cleaned","cleaning_rules_s.xlsx"))

## import population data 
Tab_pop <- paste0(data_folder, "/Tab_population.xlsx") %>% 
  rio::import() %>% 
  ## clean variable names 
  clean_names() %>%  
  select(country, population) 

## import population over 65 
pop65 <- paste0(data_folder, "/population_over65.xlsx") %>% 
  rio::import() %>% 
  ## clean variable names 
  clean_names() %>% 
  select(country, perc_over65)

## import testing data 
## source of data: https://ourworldindata.org/coronavirus-testing 
covid_testing_0 <- paste0(data_folder, "/owid-covid-data.csv") %>% 
  rio::import() %>% 
  ## clean variable names 
  clean_names() %>%  
  filter(continent == "Africa", 
         !location %in% c("Djibouti",
                          "Egypt",
                          "Morocco",
                          "Tunisia",
                          "Sudan" ,
                          "Western Sahara",
                          "Somalia",
                          "Libya"))

## import case counts data from WHO eastern mediteranean region
## source of data: https://covid19.who.int/table
covid_emro_0 <-  paste0(data_folder, "/WHO_COVID-19_global_table.csv") %>% 
  rio::import() %>% 
  ## clean variable names 
  clean_names() %>% 
  filter(name %in% c("Djibouti", 
                     "Egypt", 
                     "Morocco", 
                     "Somalia", 
                     "Sudan", 
                     "Tunisia", 
                     "Libya"))
# This chunk does basic data cleaning 


#### clean covid linelist 

## fix date variables in original dataset 
covid_afro_00$date_of_death <- lubridate::as_date(covid_afro_00$date_of_death)
covid_afro_00$reporting_date <- lubridate::as_date(covid_afro_00$reporting_date)

## create clean dataset by recoding variables based on dictionary
covid_afro <- match_df(covid_afro_00, dictionary = cleaning_rules)

## find the date of the initial report (for whole region)
covid_afro$first_report <- min(covid_afro$reporting_date)

# create epi-week variables (return the first date of the week it is in)
covid_afro$epi_week <- floor_date(covid_afro$reporting_date,
                                  "week", week_start = 1)

covid_afro$epi_week2 <- floor_date(covid_afro$date_of_death, 
                                   "week", week_start = 1)

## create an epi week variable as calendar week 
## so can show prev week easier 
covid_afro$calendar_week <- week(covid_afro$reporting_date)
covid_afro$calendar_week2 <- week(covid_afro$date_of_death)

## add in continent variable
covid_afro$continent <- "Afro"


#### clean covid testing 

## rename country
covid_testing_0 <- covid_testing_0 %>% 
  rename("country" = "location")

## create clean dataset by recoding variables based on dictionary
covid_testing <- match_df(covid_testing_0, dictionary = cleaning_rules)

## fix date variable
covid_testing$date <- as.Date(covid_testing$date)

## make sure number of new tests is numeric
covid_testing$new_tests <- as.numeric(covid_testing$new_tests)

#### clean covid testing 

## rename country 
covid_emro_0 <- covid_emro_0 %>% 
  rename("country" = "name")

covid_emro <- match_df(covid_emro_0, dictionary = cleaning_rules)

Disclaimer

Cumulative Covid-19 Cases and Deaths on the African continent

## This chunk creates a table of counts for the cumulative number of cases and 
## deaths on the continent

## get the number of cases and deaths from AFRO
tab_afro_cases_deaths <- covid_afro %>% 
                         summarise(
                           continent = "AFRO", 
                           cases = n(), 
                           deaths = sum(outcome == "Dead")
                         )
## get the number of cases and deaths from EMRO
tab_emro_cases_deaths <- covid_emro %>%
                         summarise(
                           continent = "EMRO", 
                           cases = sum(cases_cumulative_total), 
                           deaths = sum(deaths_cumulative_total)
                         )

## combined AFRO and EMRO 
bind_rows(tab_afro_cases_deaths,tab_emro_cases_deaths) %>% 
  ## add row totals 
  janitor::adorn_totals("row", fill="-", na.rm="TRUE") %>% 
  mutate(
    ## calculate case fatality ratio based on counts
    cfr          =  round((deaths/cases) * 100, 1), 
    ## add total cases and deaths as columns (workaround) 
    ## surprised that this ignores janitor totals (this is a good thing)
    total_cases  = sum(cases, na.rm = "TRUE"), 
    total_deaths = sum(deaths, na.rm = "TRUE"), 
    ## calculate percentages from totals
    perc_cases   = round( (cases / (total_cases / 2)) * 100, 1), 
    perc_deaths  = round( (deaths / (total_deaths / 2)) * 100, 1)
    ) %>% 
  ## drop the total columns and rename others
  select(
    "WHO region"   = continent, 
    "Cumul cases"  = cases, 
    "Cumul deaths" = deaths, 
    "CFR(%)"       = cfr, 
    "% cases"      = perc_cases, 
    "% deaths"     = perc_deaths
  ) %>% 
  ## create a flextable with cell width of 2 inches and heights of 1 inches each
  flextable(cwidth = 2, cheight = 1) %>% 
  ## make all the text size 18 
  fontsize(size = 18, part = "all") %>% 
  ## add a foot note referencing column 1 row 2
  footnote(j = 1, i = 2, 
                      value = as_paragraph(
                        "Includes: Djibouti, Egypt, Morocco, Libya, Somalia , Sudan, and Tunisia"))

Cumulative Cases, Deaths and Recoveries Reported from 47 Countries

## This chunk creates a table of counts for the cumulative number of cases and 
## deaths on the continent and overlays it on an epicurve with 7 day rolling average

## get table with counts to overlay 
tab_afro_cases_deaths2 <- covid_afro %>%
  summarise(
    cases          = n(), 
    deaths         = sum(outcome == "Dead"),
    cfr            =  round(deaths/cases * 100, 1),
    recovered      = sum(outcome == "Recovered"),
    perc_recovered = round(recovered/cases * 100, 1)
    ) %>% 
    ## drop the total columns and rename others
  select(
    "Cumul Cases"  = cases, 
    "Cumul Deaths" = deaths, 
    "CFR(%)"       = cfr, 
    "Cumul Recovered"      = recovered, 
    "% Recovered"          = perc_recovered
  ) %>% 
  ## flip to long 
  mutate(across(everything(), as.character)) %>% 
  pivot_longer(everything(), 
               names_to = "WHO Region", 
               values_to = "AFRO") %>% 
  flextable(cwidth = c(1.5, 1)) %>% 
  as_raster()

## plot table so can add with patchwork to epicurve 
tab_afro_cases_deaths2_plot <- ggplot() + 
  annotation_custom(grid::rasterGrob(tab_afro_cases_deaths2), 
                    xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) + 
  theme_void()


## get counts for plotting epicurve
tab_epi_group2 <- covid_afro %>% 
  ## count cases by date
  count(reporting_date, name = "new_cases") %>% 
  ## define as a time series object to be able to calculate rolling average
  tsibble(index = reporting_date) %>% 
  ## calculate the average number of cases in the preceding 7 days
  mutate(sevendayavg = slide_dbl(new_cases, 
                                 ~mean(.x, na.rm = TRUE),
                                 ## number of previous days count to include
                                 .before = 7, 
                                 ## only include days which have the full 7 previous days available
                                 .complete = TRUE))


epi_group2 <- tab_epi_group2 %>% 
    ggplot(aes(x= reporting_date)) +
    geom_bar(aes(y = new_cases), fill="#92a8d1", 
             stat = "identity",
             position = "stack",
             colour = "#92a8d1") + 
    geom_line(aes(y=sevendayavg, lty = "7-day rolling avg"),color="red", size = 1) + 
    scale_x_date(breaks = "1 week",date_minor_breaks = '1 day', 
               date_labels = "%b %d",
               limits = c( 
                 as.Date("2020-02-25"), 
                 max(na.omit(tab_epi_group2$reporting_date)))
               ) +
    ## make y and x axes meet at the origin (x = 0, y = 0) 
    scale_y_continuous(expand = c(0,0), limits = c(0, NA)) + 
    labs(x = "Date", y = "Confirmed cases (n)") + 
    # apply standard theme
    epi_theme


epi_group2 + inset_element(tab_afro_cases_deaths2_plot, 
                           left = 0, bottom = 0.6, 
                           right = 0.6, top = 1, align_to = "full")

Top 10 Countries in New Cases and Deaths Reported in the Past 28 Days

## This chunk creates to plots with the top ten countries in new cases and new 
## deaths 

day_last28 <- this_week - 28

tab_cum_cases_country_og <- covid_afro %>% 
                 filter(reporting_date >= day_last28) %>% 
                 count(country) %>% 
                 group_by(country) %>% 
                 summarise(total_cases = sum(n))

tab_cum_cases_country <- tab_cum_cases_country_og %>% 
                 top_n(10,total_cases)



plot1 <- ggplot(tab_cum_cases_country,
                aes(x = reorder(country, -total_cases ), y = total_cases))+
  geom_col(fill="#4f3222") +
  epi_theme + 
  labs(x = "Country",
       y = "Number of COVID-19 cases")

tab_cum_deaths_country <- covid_afro %>% 
                 filter(reporting_date >= day_last28) %>% 
                 filter(outcome == "Dead") %>% 
                 count(country) %>% 
                 group_by(country) %>% 
                 summarise(total_deaths=sum(n)) %>% 
                 top_n(10,total_deaths)


plot2 <- ggplot(tab_cum_deaths_country, aes(x = reorder(country, -total_deaths), y = total_deaths))+
  geom_col(fill="#d64161") +
  epi_theme + 
  labs(x = "Country",
    y = "Number of COVID-19 deaths")

plot1 / plot2

New Cases Reported Over the past 7 Days: All 47 Countries

## This chunk creates a table of counts for number of new cases in the past seven 
## days and percentage change from previous for all of AFRO

## define the date of previous 7 and 14 days 
day_last7 <- this_week - 7
day_last14 <- this_week - 14

## define which period cases occurred in 
covid_afro <- mutate(covid_afro, 
                     period = case_when(
                       reporting_date >= day_last7    ~ "last7days", 
                       reporting_date < day_last7 & 
                         reporting_date >= day_last14 ~ "previous_7days", 
                       TRUE ~ "Before_last14"), 
                     period = factor(period, 
                                        levels = c("last7days", 
                                                   "previous_7days", 
                                                   "Before_last14")))

overall_new_cases <- covid_afro %>% 
  count(period)  %>% 
  ## ensure that all factor levels are represented (fill with zero)
  complete(period, fill = list(n = 0)) %>% 
  pivot_wider(names_from = period, 
              values_from = n, 
              values_fill = 0) %>% 
  mutate(perc_change = round(((last7days - previous_7days) / previous_7days) * 100, 1)) %>% 
  select("New cases in last 7 days" = last7days, 
         "New cases in previous 7 days" = previous_7days, 
         "% change" = perc_change) %>% 
  pivot_longer(cols = everything(), 
               names_to = "Period", 
               values_to = "Value")

overall_new_cases %>% 
  flextable(cwidth = c(5, 2)) %>% 
  ## make all the text size 18 
  fontsize(size = 18, part = "all")

New Cases Reported Over the Past 7 Days: Increasing Countries

## This chunk creates a table of counts for number of new cases in the past seven 
## days and percentage change from previous week by country


## create table of cases by country counting new cases in last 7 days
tab_increase_cases <- covid_afro %>% 
  count(country, period) %>% 
  ## ensure that all factor levels are represented (fill with zero)
  complete(country, period, fill = list(n = 0)) %>% 
  pivot_wider(names_from = period, 
              values_from = n, 
              values_fill = 0) %>% 
  mutate(perc_change = round(((last7days - previous_7days) / previous_7days) * 100, 1)) %>% 
  arrange(desc(perc_change)) %>% 
  select("Country" = country, 
         "New cases in last 7 days" = last7days, 
         "New cases in previous 7 days" = previous_7days, 
         "% change" = perc_change)

## make it in to a flextable showing increasers
increasers <- flextable(
  filter(tab_increase_cases, `% change` > 0), 
  cwidth = c(1, 1.5, 1.5, 1)) %>% 
  ## make all the text size 18 
  fontsize(size = 18, part = "all") %>% 
  as_raster() 

## plot increasers 
ggplot() + 
  annotation_custom(grid::rasterGrob(increasers), 
                    xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) + 
  theme_void()

New Cases Reported Over the Past 7 Days: Decreasing Countries

## make it in to a flextable showing decreasers
decreasers <- flextable(
  filter(tab_increase_cases, `% change` <= 0), 
  cwidth = c(1, 1.5, 1.5, 1)) %>% 
  ## make all the text size 18 
  fontsize(size = 18, part = "all") %>% 
  as_raster() 

## plot decreasers 
ggplot() + 
  annotation_custom(grid::rasterGrob(decreasers), 
                    xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) + 
  theme_void()

Daily Trends for the Top 10 Countries in the Past 28 Days

## This chunk creates faceted plots by country for 10 highest incidence in past 28 days

## make own colour palettes
nb.cols <- 12
mycolors <- colorRampPalette(brewer.pal(8, "Dark2"))(nb.cols)


#initialise empty datafram for all countires 
countries <- covid_afro %>% 
  select(country) %>%
  summarise(country = unique(country)) 

#make reporting date appropriate class 
covid_afro$reporting_date <- as.Date(covid_afro$reporting_date)


#number of days since first report

day_last28 <- today() - 28

all <- covid_afro %>% 
  mutate(hcw  = ifelse(
    grepl("Confirmed", final_epi_classification, ignore.case = T) & 
      grepl("yes", healthcare_worker, ignore.case = T), 1, 0)) %>%
  filter(reporting_date >= day_last28) %>% 
  group_by(country) %>% 
  summarise(dayslastreport = as.numeric(difftime(Sys.Date(), max(reporting_date))),
            datelastreport = format(max(reporting_date), "%d %B %Y"),
            confirmed      = length(grep("Confirmed",final_epi_classification,ignore.case = T)),
            probable       = length(grep("Probable",final_epi_classification, ignore.case = T)),
            dead           = length(grep("Dead",final_outcome, ignore.case = T)),
            recovered      = length(grep("Recovered",final_outcome, ignore.case = T)),
            active         = length(grep("Alive|Probable",final_outcome, ignore.case = T)),
            hwc_confirmed  = sum(hcw, na.rm=T)) %>% 
  mutate(CFR          = round((dead/confirmed)*100, digits = 0),
         recoveryrate = round((recovered/(confirmed+probable))*100, digits = 0),
         hcwrate      = round((hwc_confirmed/(confirmed))*100, digits = 0))



#identify Top 10 countries based off confirmed case numbers


top10 <- all %>% 
  top_n(10,confirmed) %>% 
  arrange(desc(confirmed)) %>% 
  mutate(label=paste0(country,' n=', confirmed))

top10list <- unique(top10$country)

#Create dataframe for Overall top 10 countries  
 epitop10 <- covid_afro %>% 
   mutate(top10 = country %in% top10list) %>%
   mutate(top10 = as.factor(ifelse(top10 == TRUE, country, "Other"))) %>%
   dplyr::group_by(reporting_date, top10) %>%
   dplyr::summarise(
     confirmed = length(grep("Confirmed",final_epi_classification,ignore.case = T)),
     dead=length(grep("Dead", final_outcome, ignore.case = T))) %>% group_by(top10) %>% 
   mutate(label.con = paste(top10,"n=",sum(confirmed, na.rm=T)), 
          label.dead= paste(top10,"n=",sum(dead, na.rm=T))) %>% 
   group_by(top10) %>% 
   mutate(confirmed.cumsum=cumsum(confirmed),
          dead=cumsum(dead)) %>% 
   tsibble::as_tsibble(key=top10,index = reporting_date) %>% 
   tsibble::group_by_key() %>%
   mutate(sevendayavg_top10 = slider::slide_dbl(confirmed, 
                                    ~mean(.x, na.rm = TRUE),
                                    .before = 7, ## number of previous days count to include
                                    .complete = TRUE),
                                    sevendayavg_top10_cumsum = slider::slide_dbl(confirmed.cumsum, 
                                    ~mean(.x, na.rm = TRUE),
                                    .before = 7, ## number of previous days count to include
                                    .complete = TRUE)) ## only include days which have the full 7 previous days available 


 ## Distribution of new cases with a 7-day moving average by reporting date for current top 10 coutries for cases in the African region,

ggplot(epitop10,
       aes(x = as.Date(epitop10$reporting_date), y = epitop10$confirmed)) +
  geom_bar(aes(fill = label.con), stat = "identity", size = 0.3) + 
  geom_line(aes(x = as.Date(epitop10$reporting_date), 
                y = round(epitop10$sevendayavg_top10), 
                colour="#ff3300"), size=1) +
  facet_wrap(~label.con, ncol=3, scales = "free_y") + 
  scale_x_date(date_breaks = "1 month", 
               date_labels = "%b %d", 
               limits = c(as.Date(min(epitop10$reporting_date)),
                          as.Date(max(epitop10$reporting_date))), expand = c(0,0)) + 
  epi_theme + 
  theme(strip.text = element_text(size=10)) +
  labs(x="Date",y="Absolute number of confirmed cases") + 
  scale_color_discrete(name = "7 day rolling average", labels = c("7 day rolling average")) + 
  scale_fill_manual(values = mycolors, guide = FALSE)

Cases per 100,000 Population: Weekly Trends in the African Region

## This chunk creates a plot for the overall weekly attack rate

## get counts by week 
tab_cases_afro <- covid_afro %>% 
  count(epi_week) %>% 
  ## calculate the attack rate per 100k
  mutate(weekly_incidence_Afro = round(
    (n / sum(Tab_pop$population)) * 100000, 1))

## plot 
ggplot(tab_cases_afro, aes(x = epi_week, y = weekly_incidence_Afro)) + 
  geom_line(size = 1.2, colour = "#ff0000") + 
  scale_x_date(breaks = "1 week",date_minor_breaks = '1 day', 
             date_labels = "%b %d",
             limits = c( 
               as.Date("2020-02-25"), 
               max(na.omit(tab_epi_group2$reporting_date)))
             ) +
  ## make y and x axes meet at the origin (x = 0, y = 0) 
  scale_y_continuous(expand = c(0,0), limits = c(0, 15)) + 
  epi_theme + 
  ylab(label="Weekly attack rate per 100,000 people") + 
  xlab("Reporting week")

Attack Rate in the Last 14 Days by Country

## This chunk creates a plot for the attack rate in the last fourteen days by 
## country

## get counts per country
Tab_cases_last <- covid_afro %>% 
                  filter(reporting_date >= day_last14) %>% 
                  count(country)

## combine with population counts and calc rates
tab_incidence_country <- Tab_pop %>% 
                         left_join(Tab_cases_last, by = "country") %>% 
                         mutate(attack_rate = round( (n / population) * 100000, 1)) %>% 
                         arrange(desc(attack_rate))

## find highest point for y and x axis annotation 
topy <- ceiling(
  max(tab_incidence_country$attack_rate, na.rm = TRUE)) + 5

## plot 
ggplot(tab_incidence_country, 
       aes(x = reorder(country, -attack_rate ), y = attack_rate)) + 
  geom_col(fill = "#4f3222") +
  geom_text(aes(label = attack_rate),
            position = position_dodge(width = 0.9),
            vjust = -0.25, size = 5, face = "bold") +
  ## make y and x axes meet at the origin (x = 0, y = 0) 
  scale_y_continuous(expand = c(0,0), limits = c(0, topy)) + 
  labs(x = "Country",
    y = "Cases per 100,000 pop in the last 14 days") + 
  epi_theme

Deaths per 100,000 Population Vs Cases per 100,000 in Past 14 Days

## This chunk creates a dot plot comparing deaths to cases per 100k population and 
## highlighting low death/high incidence countries

## get counts of deaths 
Tab_deaths_last14 <- covid_afro %>% 
                  filter(date_of_death >= day_last14 & 
                           outcome == "Dead") %>% 
                  count(country)

## combine with population to get rates
tab_death_million <- Tab_deaths_last14 %>% 
  left_join(Tab_pop, by = "country") %>% 
    mutate(deaths_capita = round((n / population) * 100000, 1)) 


## combine with previous table of incidence 
tab_death_cases_million2 <- tab_death_million %>% 
                     left_join(tab_incidence_country, by = "country")

## find highest point for y axis annotation 
topy <- ceiling(
  max(tab_death_cases_million2$attack_rate, na.rm = TRUE)
)


## remove country names if mortality rate is zero (so not overcrowding plot)
tab_death_cases_million2 <- tab_death_cases_million2 %>% 
  mutate(country = case_when(
    deaths_capita == 0 ~ "", 
    TRUE ~ country
  ))




ggplot(data = tab_death_cases_million2, 
       aes(x = deaths_capita, y = attack_rate, label = country)) + 
  geom_point(col = "#c83349", size = 2) +
  geom_text_repel(col = "blue", size = 6, 
                  segment.alpha = 0.6, segment.size = 0.5) +
  annotate("rect", 
           xmin = 0, xmax = 0.5, 
           ymin = 20, ymax = topy, 
           alpha = 0.2) + 
  annotate("text", 
           size = 4,
           x = 0.6, y = topy - 10, 
           label = "High incidence / low mortality countries") + 
  ## make y and x axes meet at the origin (x = 0, y = 0) 
  scale_y_continuous(expand = c(0,0), limits = c(0, NA)) + 
  epi_theme + 
  xlab("Deaths per 10,000 pop in the last 14 days") +
  ylab("Cases per 100,000 pop in the last 14 days ")

Testing Performance in the African Region - Last 4 Weeks

# This chunk creates a dot plot comparing tests performed to positivity rate over
# the last 4 weeks

tab_new_tests <- covid_testing %>% 
             filter(date >= day_last28) %>% 
             group_by(country) %>% 
             summarise(total_tests = sum(new_tests, na.rm = TRUE))

tab_new_cases <- covid_afro %>% 
             filter(reporting_date >= day_last28) %>% 
             count(country)

tab_cases_tests <- tab_new_tests %>% 
                   left_join(tab_new_cases, by = "country")

tab_tests_final <- tab_cases_tests %>% 
  left_join(Tab_pop, by = "country") %>% 
  mutate(average_weekly_tests_capita = round(( (total_tests / 4) / 
                                                 population) * 10000, 1), 
         positivity_rate = round((n / total_tests) * 100, 1)) %>% 
  filter(positivity_rate <= 100)


## find highest point for y and x axis annotation 
topy <- ceiling(
  max(tab_tests_final$average_weekly_tests_capita, na.rm = TRUE)
)

topx <- ceiling(
  max(tab_tests_final$positivity_rate, na.rm = TRUE)
)

ggplot(data = tab_tests_final, 
       aes(x = positivity_rate, y = average_weekly_tests_capita, label = country)) + 
  geom_point(col = "#E1A70E", size = 2) +
  geom_text_repel(col = "blue", size = 6, 
                  segment.alpha = 0.6, segment.size = 0.5) +
  annotate("rect", 
           xmin = 0, xmax = 5, 
           ymin = 10, ymax = topy, 
           alpha = 0.2) + 
  annotate("text", 
           size = 4,
           x = 5, y = topy - 5, 
           label = "High test rate / low positivity rate countries") + 
  annotate("rect", 
           xmin = 10, xmax = topx, 
           ymin = 0, ymax = 10, 
           alpha = 0.2) + 
  annotate("text", 
           size = 4,
           x = 20, y = 8, 
           label = "Low test rate / high positivity rate countries") + 
  ## make y and x axes meet at the origin (x = 0, y = 0) 
  scale_y_continuous(expand = c(0,0), limits = c(0, NA)) + 
  epi_theme + 
  xlab("Positivity rate during the last 4 weeks") +
  ylab("Average weekly nb of tests per 10,000 during the last 4 weeks ")

Health Worker COVID-19 Cases Reported in the African Region: Part 1

## This chunk creates two side-by-side tables of the number of healthcare workers 
## with 

## get counts of healthcare workers
Tab_hcw <- covid_afro %>% 
          filter(healthcare_worker %in% c("Yes", "yes")) %>% 
          count(country, name = "nb_hcw") 

## get total number of cases 
tab_cases3 <- covid_afro %>%
              count(country, name = "nb_cases")

## combine counts of cases and hcw             
tab_cases_hcw <- tab_cases3 %>% 
  left_join(Tab_hcw, by = "country") %>% 
  ## replace missings accross all numerics to be 0
  mutate(
    across(everything(), ~replace_na(., 0)))

# combine tables and create rate vars
tab_hcw_final <- tab_cases_hcw %>% 
   left_join(Tab_pop, by="country") %>% 
   janitor::adorn_totals("row", fill = "-", na.rm = TRUE) %>% 
   mutate(hcw_among_cases = round(( nb_hcw / nb_cases) * 100, 1)) %>% 
   mutate(hcw_per_million = round(( nb_hcw / population) * 1000000, 1)) %>%
   arrange(desc(nb_hcw)) %>% 
   select("Country"            = country,
          "Cumul cases"        = nb_cases, 
          "Cumul HW"           = nb_hcw,
          "% HW"               = hcw_among_cases, 
          "HW per million pop" = hcw_per_million) 


## make it in to a flextable as a raster for plotting in ggplot
increasers <- flextable(
  filter(tab_hcw_final, `Cumul HW` >= 300), 
  cwidth = c(1, 2, 2, 2, 2)) %>% 
  ## make all the text size 18 
  fontsize(size = 18, part = "all") %>% 
  as_raster() 

## plot increasers 
ggplot() + 
  annotation_custom(grid::rasterGrob(increasers), 
                    xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) + 
  theme_void()

Health Worker COVID-19 Cases Reported in the African Region: Part 2

## make it in to a flextable as a raster for plotting in ggplot
decreasers <- flextable(
  filter(tab_hcw_final, `Cumul HW`  < 300), 
  cwidth = c(1, 2, 2, 2, 2)) %>% 
  ## make all the text size 18 
  fontsize(size = 18, part = "all") %>% 
  as_raster() 

## plot increasers 
ggplot() + 
  annotation_custom(grid::rasterGrob(decreasers), 
                    xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) + 
  theme_void()

%HW Among COVID-19 Cases vs HW Infections per Million Population

## This chunk creates a dot plot comparing tests performed to positivity rate over
## the last 4 weeks

tab_hcw_final2 <- tab_cases_hcw %>% 
  left_join(Tab_pop, by = "country") %>% 
  mutate(perc_hcw_among_cases = round((nb_hcw / nb_cases) * 100, 1), 
         cases_per_million = round((nb_hcw / population) * 1000000, 1))


## remove country names if mortality rate is zero (so not overcrowding plot)
tab_hcw_final2 <- tab_hcw_final2 %>% 
  mutate(country = case_when(
    cases_per_million == 0 & 
      perc_hcw_among_cases < 3 ~ "", 
    TRUE ~ country
  ))



## find highest point for y and x axis annotation 
topy <- ceiling(
  max(tab_hcw_final2$cases_per_million, na.rm = TRUE)
)

ggplot(data = tab_hcw_final2, 
                       aes(x = perc_hcw_among_cases, y = cases_per_million, label=country)) + 
  geom_point(col = "#1B9E77", size = 2) +
  geom_text_repel(col = "blue", size = 6, 
                  segment.alpha = 0.6, segment.size = 0.5) +
  annotate("rect", 
           xmin = 0, xmax = 10, 
           ymin = 200, ymax = topy, 
           alpha = 0.2) + 
  annotate("text", 
           size = 4,
           x = 10, y = topy - 100, 
           label = "Low % HW Covid cases / High number HW infections per million population") + 
  ## make y and x axes meet at the origin (x = 0, y = 0) 
  scale_y_continuous(expand = c(0,0), limits = c(0, NA)) + 
  epi_theme + 
  xlab("% health workers among onfirmed cases") +
  ylab("Nb of health workers infections per million people")

Summary

## this chunk produces the numbers required for producing the bullet points 
## on the summary slide


## week on week increases 
trough <- tab_cases_afro %>% 
  mutate(increasing = weekly_incidence_Afro >= lag(weekly_incidence_Afro),
         change_point = increasing - lag(increasing)) %>% 
  filter(change_point == 1) %>% 
  tail()

increasing_weeks <- week(this_week) - week(trough$epi_week)  

change_count <- overall_new_cases[3, 2] %>% 
  pull()

## three highest countries and their percentage 28 days
top3 <- tab_cum_cases_country_og %>% 
  arrange(desc(total_cases)) %>% 
  slice(1:3)

top3countries <- paste0(top3$country, collapse = ", ")

top3countries_perc <- round(
  sum(top3$total_cases) / sum(tab_cum_cases_country_og$total_cases) * 100, 1)


## increase over 20% in the last seven days 
increasers <- tab_increase_cases %>% 
  filter(`% change` >= 20)

increasing_trend <- covid_afro %>% 
  filter(country %in% increasers$Country) %>% 
  count(country, reporting_date) %>% 
  group_by(country) %>% 
  mutate(increasing = n >= lag(n),
         change_point = increasing - lag(increasing)) %>% 
  slice_tail(n = 4) %>% 
  nest() %>% 
  mutate(model = purrr::map(data, 
                            ~lm(n ~ reporting_date, data = .))) %>% 
  mutate(estimate = purrr::map(model, broom::tidy)) %>% 
  unnest(estimate) %>% 
  filter(term == "reporting_date" & estimate > 0)

increasing_trend_countries <- paste0(increasing_trend$country, collapse = ", ")



## low tests per 10,000 
low_test_rate <- tab_tests_final %>% 
  filter(average_weekly_tests_capita < 10)

countries_under_five <- paste0(low_test_rate$country[low_test_rate$positivity_rate < 5], collapse = ", ")

countries_above_ten <- paste0(low_test_rate$country[low_test_rate$positivity_rate > 10], collapse = ", ")


R4IDSR/covidmonitor documentation built on March 29, 2021, 12:05 p.m.