library(lubridate)
library(dplyr)
library(here)
library(rio)
library(utils)
library(knitr)
library(kableExtra)
library(gtable)
library(scales)
library(purrr)
library(readxl)
library(ggplot2)
library(RColorBrewer)
library(reshape2)
library(magrittr)
library(flextable)
library(rmarkdown)
library(linelist)
library(survival)
library(gtsummary)
library(apyramid)
library(sjlabelled)
library(survminer)
##################################
#Initialise Theme for plotting
pub_theme4 <- theme(text=element_text(family="Arial"),
axis.text.x=element_text(size=24,angle=45,color="black", vjust=0.5, hjust=0.5),
axis.text.y=element_text(size=24,color="black"),
axis.title.y=element_text(size=26,face="bold",color="black"),
axis.title.x=element_text(size=26,face="bold",color="black"),
panel.grid.major.y=element_line(size=.5, color="gray", linetype = "solid"),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
#panel.grid.minor=element_blank(),
panel.background=element_rect(fill = "white", colour = "grey50"),
axis.line=element_line(colour="black"),legend.position = "bottom",
legend.direction = "horizontal",
legend.key.size= unit(0.4, "cm"),
legend.title = element_text(size=26,face="bold.italic"),
legend.text=element_text(size=24, face="italic"),
#panel.border=element_rect(colour="black", fill=NA, size=1),
plot.margin=unit(c(0.2,0.2,0.2,0.2),"in"))
#make own colour palettes
nb.cols <- 12
mycolors <- colorRampPalette(brewer.pal(8, "PRGn"))(nb.cols)
mycolors2 <- colorRampPalette(brewer.pal(8, "Set1"))(nb.cols)
###################################
big_data_clean<-rio::import(here::here("inst/","Cleaned_linelist_2021-01-08.xlsx"), guess_max=100000) #guess max added to ensure integers are not read in as boolean
big_data_analyse<-big_data_clean
#bin off algeria redo tjhe merge and clean
big_data_analyse <-big_data_analyse %>% filter(!(country_iso %in% c("DZA"))) #algeria should be in anyway
###########Filtering the data set ###########
dates<- big_data_analyse %>% group_by(country_iso,country_full) %>%
summarise(minreport=format(min(report_date, na.rm = T), "%Y %B %d"),
maxreport=format(max(report_date, na.rm = T), "%Y %B %d"))
#define date we are reporting to
dateofreport<-as.Date("2020-10-31", origin= "1899-12-30")
#filter for cases before dateofreport variable ie 26/10/2020
startdateofreport<-as.Date("2020-03-21", origin= "1899-12-30")
big_data_analyse<-dplyr::mutate(big_data_analyse, across(c("patcourse_datedeath","patcourse_datedischarge"), ~ ifelse(country_iso=="TCD", as.Date(.,origin= "1970-01-01") + lubridate::years(4), as.Date(.,origin= "1970-01-01"))))
big_data_analyse<-dplyr::mutate(big_data_analyse, across(c("patcourse_datedeath","patcourse_datedischarge"), as.Date, origin= "1970-01-01")) %>%
filter(report_date<=dateofreport & report_date>=startdateofreport)
#remove countries that do not have such reports up until 2020-10-31
big_data_analyse<- big_data_analyse %>% group_by(country_iso) %>% mutate(lastreport_ =ifelse(max(report_date, na.rm = T)==dateofreport,1,0))
big_data_analyse<- big_data_analyse %>% group_by(country_iso) %>% filter(lastreport_==1)
#filter confrimed cases
big_data_analyse<- big_data_analyse %>% filter(report_classif_alice=="confirmed")
#countries that should be dropped
big_data_analyse <-big_data_analyse %>% filter(!(country_iso %in% c("BFA", "GIN"))) #remove BFA and GIN as did not report outcome vars
#in GIN the variable for status was is case dead with 1= dead and 0 = alive. ) has been lost in the merging of linelists as was difficult to retain and merge all
#therfore populate patcourse_status if missing for alive only for ginuea
big_data_analyse$patcourse_status<-ifelse(big_data_analyse$country_iso=="GIN" & is.na(big_data_analyse$patcourse_status), "alive", big_data_analyse$patcourse_status)
#only include confirmed cases that are not missing outcome
big_data_analyse<- big_data_analyse %>% filter(!is.na(patcourse_status))
############################################
###########Variable creation###########
big_data_analyse$agegroup<- cut(big_data_analyse$patinfo_ageonset, breaks = c(0, 15, 25, 35, 45, 55,65,75,Inf), right = FALSE,labels = c("< 15 years", "15-24 years","25-34 years","35-44 years","45-54 years","55-64 years","65-74 years","75 years +"))
#correcting if any "men" with pregancy entered into their comorbs
big_data_analyse$pregnancy<-ifelse(big_data_analyse$patinfo_sex=="M" & big_data_analyse$pregnancy==1,NA,big_data_analyse$pregnancy)
#ensuring only those not missing have this variable corrrect
big_data_analyse$capital_final<-ifelse(is.na(big_data_analyse$patinfo_resadmin1), NA, big_data_analyse$capital_final)
big_data_analyse<-dplyr::mutate(big_data_analyse, across(c(diabetes, asthma, hypertension,cancer,
renal_disease,cardiovascular_disease,obesity,tuberculosis,drepanocytosis,chronic_pulmonary), as.numeric))
big_data_analyse$comcond_preexsist_count_top10<-rowSums(select(ungroup(big_data_analyse), c(diabetes, asthma, hypertension,cancer,
renal_disease,cardiovascular_disease,obesity,tuberculosis,drepanocytosis,chronic_pulmonary)), na.rm = T)
###########################
#time to death analysis need to make missing problem dates entered in wrong for lab taken date date of death and date of date discharge
big_data_analyse<-ungroup(big_data_analyse)
big_data_analyse$lab_datetaken <- if_else(big_data_analyse$lab_datetaken< as.Date("2020-01-01") | big_data_analyse$lab_datetaken> as.Date(dateofreport),as.Date(NA),as.Date(big_data_analyse$lab_datetaken))
big_data_analyse$patcourse_datedeath <- if_else(big_data_analyse$patcourse_datedeath< as.Date("2020-01-01") | big_data_analyse$patcourse_datedeath> as.Date(dateofreport),as.Date(NA),as.Date(big_data_analyse$patcourse_datedeath))
big_data_analyse$patcourse_datedischarge <- if_else(big_data_analyse$patcourse_datedischarge< as.Date("2020-01-01") | big_data_analyse$patcourse_datedischarge> as.Date(dateofreport),as.Date(NA),as.Date(big_data_analyse$patcourse_datedischarge))
big_data_analyse$lab_resdate<-if_else(big_data_analyse$lab_resdate< as.Date("2020-01-01") | big_data_analyse$lab_resdate> as.Date(dateofreport),as.Date(NA),as.Date(big_data_analyse$lab_resdate))
big_data_analyse$startdate<-big_data_analyse$lab_resdate
big_data_analyse$startdate<-if_else(is.na(big_data_analyse$startdate), big_data_analyse$lab_datetaken,big_data_analyse$startdate)
#use this df in the time to death analysis
big_data_analyse<-big_data_analyse %>% filter(!is.na(startdate) & !is.na(patcourse_status))
big_data_analyse$enddate<-dplyr::if_else(big_data_analyse$patcourse_status=="dead",big_data_analyse$patcourse_datedeath,big_data_analyse$patcourse_datedischarge)
big_data_analyse$enddate<-dplyr::if_else(big_data_analyse$patcourse_status=="alive" & is.na(big_data_analyse$enddate),dateofreport,big_data_analyse$enddate)
big_data_analyse<-big_data_analyse %>% filter(!is.na(enddate))
#Create binary variables for cox regression
#1 denotes presence 0 deontes no presence OR not reported. Summary table displays misssingness for each variable used in cox regression
big_data_analyse$comcond_preexist1_alice_binary<-ifelse(big_data_analyse$comcond_preexist1_alice=="yes",1,0)
big_data_analyse$hcw_binary<-ifelse(big_data_analyse$hcw=="TRUE",1,0)
big_data_analyse$hcw_binary<-ifelse(is.na(big_data_analyse$hcw_binary),0,big_data_analyse$hcw_binary)
big_data_analyse$capital_final_binary<-ifelse(big_data_analyse$capital_final=="TRUE",1,0)
big_data_analyse$capital_final_binary<-ifelse(is.na(big_data_analyse$capital_final_binary),0,big_data_analyse$capital_final_binary)
big_data_analyse$pregant_binary<-ifelse(is.na(big_data_analyse$pregnancy) & big_data_analyse$patinfo_sex=="F",0,big_data_analyse$pregnancy)
big_data_analyse$patinfo_sex_binary<-ifelse(!is.na(big_data_analyse$patinfo_sex) & big_data_analyse$patinfo_sex=="M",1,
ifelse(is.na(big_data_analyse$patinfo_sex),NA,0))
big_data_analyse$comcond_preexsist_count_top10<-ifelse(big_data_analyse$comcond_preexist1_alice_binary==0,"no/not reported",big_data_analyse$comcond_preexsist_count_top10)
#define failure
big_data_analyse$vlfail<-ifelse(big_data_analyse$patcourse_status=="dead",1,0)
#Create person time variable (observation time)
big_data_analyse$perstime <- as.numeric(difftime(big_data_analyse$enddate, big_data_analyse$startdate, units = "days")) # / 365.25 # (if want in pers-years)
big_data_analyse<- big_data_analyse %>% filter(big_data_analyse$perstime>0)
#Add the survial object as variable to your dataset which is survival time (for kaplan-meier and cox)
big_data_analyse<-big_data_analyse %>% filter(!is.na(vlfail) & !is.na(patinfo_ageonset) & !is.na(patinfo_sex_binary) & !is.na(comcond_preexist1_alice_binary) & !is.na(capital_final_binary) & !is.na(hcw_binary))
#make copy dataset to use for cox
timetodeath_numbers<-big_data_analyse
timetodeath_numbers$SurvObj <- with(timetodeath_numbers, survival::Surv(time = perstime, event = vlfail))
#################
#dataset prep for lrm
fit<- big_data_analyse %>% filter(!is.na(patcourse_status))
#Create binary variables for logistic regression
#1 denotes presence 0 deontes no presence OR not reported. Summary table displays misssingness for each variable used in cox regression
fit$hcw_binary<-ifelse(fit$hcw=="TRUE",1,0)
fit$hcw_binary<-ifelse(is.na(fit$hcw_binary),0,fit$hcw_binary)
fit$capital_final_binary<-ifelse(fit$capital_final=="TRUE",1,0)
fit$capital_final_binary<-ifelse(is.na(fit$capital_final_binary),0,fit$capital_final_binary)
fit$pregant_binary<-ifelse(is.na(fit$pregnancy) & fit$patinfo_sex=="F",0,fit$pregnancy)
fit$patinfo_sex_binary<-ifelse(!is.na(fit$patinfo_sex) & fit$patinfo_sex=="M",1,
ifelse(is.na(fit$patinfo_sex),NA,0))
#define failure
fit$vlfail<-ifelse(fit$patcourse_status=="dead",1,0)
#######################
all<- big_data_analyse %>% mutate(hcw_all=ifelse(grepl("confirmed",report_classif_alice, ignore.case = T) & hcw_binary==1,1,0), hcw_all_dead=ifelse(vlfail==1 &hcw_binary==1,1,0)) %>%
group_by(country_iso,country_full) %>%
summarise(dayslastreport=as.numeric(difftime(dateofreport, max(report_date,na.rm = T))),
maxreportdateafterfilter=format(max(report_date, na.rm = T), "%Y %B %d"),
datelastreport=format(max(dateofreport, na.rm = T), "%Y %B %d"),
confirmed=length(grep("Confirmed",report_classif_alice,ignore.case = T)),
recovered=length(grep("recovered",patcourse_status_recovered,ignore.case = T)),
dead=length(grep("Dead",patcourse_status, ignore.case = T)),
alive=length(grep("alive",patcourse_status, ignore.case = T)),
hwc_confirmed=sum(hcw_all, na.rm=T),
hwc_dead=sum(hcw_all_dead, na.rm=T),
ncd_yes=length(grep("yes",comcond_preexist1_alice, ignore.case = T)),
ncd_no=length(grep("^no$",comcond_preexist1_alice, ignore.case = T)),
ncd_notreport=length(grep("not given",comcond_preexist1_alice, ignore.case = T))) %>%
mutate(CFR=round((dead/confirmed)*100, digits = 2),
hcw_proportion= round((hwc_confirmed/(confirmed))*100, digits = 2),
hcw_cfr=round((hwc_dead/(hwc_confirmed))*100, digits = 2))
openxlsx::write.xlsx(all,"whoisddying_manuscript/New/summary_confirmedcases.xlsx")
hcw<-big_data_analyse %>% mutate(hcw_all=ifelse(grepl("confirmed",report_classif_alice, ignore.case = T) & grepl("TRUE", hcw, ignore.case = T),1,0)) %>% filter(hcw_all==1) %>%
group_by(country_iso,country_full, patinfo_sex) %>% summarise(dayslastreport=as.numeric(difftime(as.Date("2020-10-30", origin= "1899-12-30"), max(report_date,na.rm = T))),
datelastreport=format(max(report_date, na.rm = T), "%Y %B %d"),
confirmed=length(grep("Confirmed",report_classif_alice,ignore.case = T)),
recovered=length(grep("recovered",patcourse_status_recovered,ignore.case = T)),
dead=length(grep("Dead",patcourse_status, ignore.case = T)),
alive=length(grep("alive",patcourse_status, ignore.case = T))) %>%
mutate(CFR=round((dead/confirmed)*100, digits = 2))
cfr_bysexage<-big_data_analyse %>% filter(!is.na(patinfo_ageonset) & !is.na(patinfo_sex)) %>% group_by(agegroup,patinfo_sex) %>% mutate(hcw_all=ifelse(grepl("confirmed",report_classif_alice, ignore.case = T) & grepl("TRUE", hcw, ignore.case = T),1,0)) %>%
summarise(confirmed=length(grep("Confirmed",report_classif_alice,ignore.case = T)),
dead=length(grep("Dead",patcourse_status, ignore.case = T)),
alive=length(grep("alive",patcourse_status, ignore.case = T)),
hwc_confirmed=sum(hcw_all, na.rm=T)) %>%
mutate(CFR=round((dead/confirmed)*100, digits = 2))
all_sexage<-big_data_analyse %>% filter(!is.na(patinfo_ageonset) & !is.na(patinfo_sex)) %>% mutate(hcw_all=ifelse(grepl("confirmed",report_classif_alice, ignore.case = T) & grepl("TRUE", hcw, ignore.case = T),1,0),hcw_dead_case=ifelse(grepl("dead",patcourse_status, ignore.case = T) & grepl("TRUE", hcw, ignore.case = T),1,0)) %>%
group_by(patinfo_sex,agegroup) %>%
summarise(confirmed=length(grep("Confirmed",report_classif_alice,ignore.case = T)),
#total_reported=length(!is.na(patinfo_id)), #everyone in the line list?
dead=length(grep("Dead",patcourse_status, ignore.case = T)),
alive=length(grep("alive",patcourse_status, ignore.case = T)),
hwc_confirmed=sum(hcw_all, na.rm=T),
hcw_dead=sum(hcw_dead_case, na.rm=T)) %>% ungroup() %>% #ungroup so calculate the percentages with the denominator as the total of male and female
mutate(CFR=round((dead/confirmed)*100, digits = 2),
hcwrate= round((hwc_confirmed/(confirmed))*100, digits = 2),
confirmed_perc=confirmed/sum(confirmed, na.rm = T)*100,
dead_perc=dead/sum(dead, na.rm = T)*100)
all_sexage_wide<-tidyr::pivot_wider(all_sexage,agegroup,names_from= patinfo_sex,values_from=c(confirmed,alive,dead,hwc_confirmed, hcw_dead,CFR,hcwrate)) %>%
openxlsx::write.xlsx("whoisddying_manuscript/New/age_sex_count_hcwincluded.xlsx")
summary_sexage_wide<-tidyr::pivot_wider(all_sexage,agegroup,names_from= patinfo_sex,values_from=confirmed) %>% mutate( total= `M`+`F`,male_perc=(`M`/(`M`+`F`))*100 , agegroup_perc=((`M`+`F`)/sum(`M`+`F`, na.rm = T))*100) %>%
openxlsx::write.xlsx("whoisddying_manuscript/New/age_sex_count.xlsx")
summary_sexage_dead_wide<-tidyr::pivot_wider(all_sexage,agegroup,names_from= patinfo_sex,values_from=dead) %>% mutate(total= `M`+`F`,male_perc=(`M`/(`M`+`F`))*100 , agegroup_perc=((`M`+`F`)/sum(`M`+`F`, na.rm = T))*100) %>%
openxlsx::write.xlsx("whoisddying_manuscript/New/age_sex_deaths_count.xlsx")
#ncd does not include those with no comcond entered ie. UGA
ncd<- big_data_analyse %>% group_by(country_iso) %>% filter(length(unique(as.list(comcond_preexist1_alice)))>1) %>%
group_by(country_iso,comcond_preexist1_alice) %>% mutate(hcw_all=ifelse(grepl("confirmed",report_classif_alice, ignore.case = T) & grepl("TRUE", hcw, ignore.case = T),1,0)) %>%
summarise(confirmed=length(grep("Confirmed",report_classif_alice,ignore.case = T)),
dead=length(grep("Dead",patcourse_status, ignore.case = T)),
alive=length(grep("alive",patcourse_status, ignore.case = T)),
hwc_confirmed=sum(hcw_all, na.rm=T))
ncd_no_notreported<- ncd %>% group_by(country_iso) %>% dplyr::summarise(
confirmed=sum(confirmed[comcond_preexist1_alice=='no' | comcond_preexist1_alice=='not given']),
dead=sum(dead[comcond_preexist1_alice=='no' | comcond_preexist1_alice=='not given']),
alive=sum(alive[comcond_preexist1_alice=='no' | comcond_preexist1_alice=='not given']),
hwc_confirmed=sum(hwc_confirmed[comcond_preexist1_alice=='no' | comcond_preexist1_alice=='not given']))
ncd_no_notreported$comcond_preexist1_alice<-"no/not reported"
ncd_new<-merge(ncd,ncd_no_notreported, all=T) %>%
mutate(CFR=round((dead/confirmed)*100, digits = 2),
hcwrate= round((hwc_confirmed/(confirmed))*100, digits = 0),
confirmed_perc=confirmed/sum(confirmed, na.rm = T)*100,
dead_perc=dead/sum(dead, na.rm = T)*100)
ncd_all<- ncd_new %>% group_by(comcond_preexist1_alice) %>%
summarise(confirmed=sum(confirmed),
dead=sum(dead),
alive=sum(alive),
hwc_confirmed=sum(hwc_confirmed)) %>% group_by(comcond_preexist1_alice) %>%
mutate(CFR=round((dead/confirmed)*100, digits = 2),
hcwrate= round((hwc_confirmed/(confirmed))*100, digits = 2))
#chi squared analysis for dead vs alive in confirmed cases between comcond groups (yes vs. no/ not recorded)
ncd_all_chi<- big_data_analyse %>% filter(length(unique(as.list(comcond_preexist1_alice)))>1) %>% select(id,patinfo_id,country_iso,comcond_preexist1_alice, patcourse_status, report_classif_alice) %>%
mutate(comcond_preexist1_alice=ifelse(comcond_preexist1_alice!="yes", "no/not reported",comcond_preexist1_alice))
unique(ncd_all_chi$country_iso) #coutrnies in ncd analysis
ncd_all_chi_summary<-ncd_all_chi %>% group_by(comcond_preexist1_alice) %>% summarise(confirmed=length(grep("Confirmed",report_classif_alice,ignore.case = T)),
dead=length(grep("Dead",patcourse_status, ignore.case = T)),
alive=length(grep("alive",patcourse_status, ignore.case = T))) %>%
mutate(CFR=round((dead/confirmed)*100, digits = 2))
chisq.test(ncd_all_chi$comcond_preexist1_alice,ncd_all_chi$patcourse_status)
table(ncd_all_chi$comcond_preexist1_alice,ncd_all_chi$patcourse_status)
######
#ncds by type
ncd_type_count<-big_data_analyse
diabetes<- ncd_type_count %>% group_by(diabetes) %>% summarise(confirmed=length(grep("Confirmed",report_classif_alice,ignore.case = T)),
dead=length(grep("Dead",patcourse_status, ignore.case = T)),
alive=length(grep("alive",patcourse_status, ignore.case = T))) %>%
mutate(diabetes=ifelse(diabetes==1,"diabetes",NA)) %>% filter(!is.na(diabetes)) %>% rename("co-morbidity"="diabetes")
hta<-ncd_type_count %>% group_by(hypertension) %>% summarise(confirmed=length(grep("Confirmed",report_classif_alice,ignore.case = T)),
dead=length(grep("Dead",patcourse_status, ignore.case = T)),
alive=length(grep("alive",patcourse_status, ignore.case = T))) %>%
mutate(hypertension=ifelse(hypertension==1,"hypertension",NA)) %>% filter(!is.na(hypertension)) %>% rename("co-morbidity"="hypertension")
asthma<- ncd_type_count %>% group_by(asthma) %>% summarise(confirmed=length(grep("Confirmed",report_classif_alice,ignore.case = T)),
dead=length(grep("Dead",patcourse_status, ignore.case = T)),
alive=length(grep("alive",patcourse_status, ignore.case = T))) %>%
mutate(asthma=ifelse(asthma==1,"asthma",NA)) %>% filter(!is.na(asthma)) %>% rename("co-morbidity"="asthma")
cancer<-ncd_type_count %>% group_by(cancer) %>% summarise(confirmed=length(grep("Confirmed",report_classif_alice,ignore.case = T)),
dead=length(grep("Dead",patcourse_status, ignore.case = T)),
alive=length(grep("alive",patcourse_status, ignore.case = T))) %>%
mutate(cancer=ifelse(cancer==1,"cancer",NA)) %>% filter(!is.na(cancer)) %>% rename("co-morbidity"="cancer")
obesity<-ncd_type_count %>% group_by(obesity) %>% summarise(confirmed=length(grep("Confirmed",report_classif_alice,ignore.case = T)),
dead=length(grep("Dead",patcourse_status, ignore.case = T)),
alive=length(grep("alive",patcourse_status, ignore.case = T))) %>%
mutate(obesity=ifelse(obesity==1,"obesity",NA)) %>% filter(!is.na(obesity)) %>% rename("co-morbidity"="obesity")
cvd<-ncd_type_count %>% group_by(cardiovascular_disease) %>% summarise(confirmed=length(grep("Confirmed",report_classif_alice,ignore.case = T)),
dead=length(grep("Dead",patcourse_status, ignore.case = T)),
alive=length(grep("alive",patcourse_status, ignore.case = T))) %>%
mutate(cardiovascular_disease=ifelse(cardiovascular_disease==1,"cardiovascular disease",NA)) %>% filter(!is.na(cardiovascular_disease)) %>% rename("co-morbidity"="cardiovascular_disease")
rd<-ncd_type_count %>% group_by(renal_disease) %>% summarise(confirmed=length(grep("Confirmed",report_classif_alice,ignore.case = T)),
dead=length(grep("Dead",patcourse_status, ignore.case = T)),
alive=length(grep("alive",patcourse_status, ignore.case = T))) %>%
mutate(renal_disease=ifelse(renal_disease==1,"renal disease",NA)) %>% filter(!is.na(renal_disease)) %>% rename("co-morbidity"="renal_disease")
tb<-ncd_type_count %>% group_by(tuberculosis) %>% summarise(confirmed=length(grep("Confirmed",report_classif_alice,ignore.case = T)),
dead=length(grep("Dead",patcourse_status, ignore.case = T)),
alive=length(grep("alive",patcourse_status, ignore.case = T))) %>%
mutate(tuberculosis=ifelse(tuberculosis==1,"tuberculosis",NA)) %>% filter(!is.na(tuberculosis)) %>% rename("co-morbidity"="tuberculosis")
pregnancy<-ncd_type_count %>% group_by(pregnancy) %>% summarise(confirmed=length(grep("Confirmed",report_classif_alice,ignore.case = T)),
dead=length(grep("Dead",patcourse_status, ignore.case = T)),
alive=length(grep("alive",patcourse_status, ignore.case = T))) %>%
mutate(pregnancy=ifelse(pregnancy==1,"pregnancy",NA)) %>% filter(!is.na(pregnancy)) %>% rename("co-morbidity"="pregnancy")
drepanocytosis<-ncd_type_count %>% group_by(drepanocytosis) %>% summarise(confirmed=length(grep("Confirmed",report_classif_alice,ignore.case = T)),
dead=length(grep("Dead",patcourse_status, ignore.case = T)),
alive=length(grep("alive",patcourse_status, ignore.case = T))) %>%
mutate(drepanocytosis=ifelse(drepanocytosis==1,"drepanocytosis",NA)) %>% filter(!is.na(drepanocytosis)) %>% rename("co-morbidity"="drepanocytosis")
pd<-ncd_type_count %>% group_by(chronic_pulmonary) %>% summarise(confirmed=length(grep("Confirmed",report_classif_alice,ignore.case = T)),
dead=length(grep("Dead",patcourse_status, ignore.case = T)),
alive=length(grep("alive",patcourse_status, ignore.case = T))) %>%
mutate(chronic_pulmonary=ifelse(chronic_pulmonary==1,"chronic pulmonary disease",NA)) %>% filter(!is.na(chronic_pulmonary)) %>% rename("co-morbidity"="chronic_pulmonary")
not_spec<-ncd_type_count %>% group_by(not_specified_comorb) %>% summarise(confirmed=length(grep("Confirmed",report_classif_alice,ignore.case = T)),
dead=length(grep("Dead",patcourse_status, ignore.case = T)),
alive=length(grep("alive",patcourse_status, ignore.case = T))) %>%
mutate(not_specified_comorb=ifelse(not_specified_comorb==1,"not specified",NA)) %>% filter(!is.na(not_specified_comorb)) %>% rename("co-morbidity"="not_specified_comorb")
other_comorb<-ncd_type_count %>% group_by(other_comorb) %>% summarise(confirmed=length(grep("Confirmed",report_classif_alice,ignore.case = T)),
dead=length(grep("Dead",patcourse_status, ignore.case = T)),
alive=length(grep("alive",patcourse_status, ignore.case = T))) %>%
mutate(other_comorb=ifelse(other_comorb==1,"other",NA)) %>% filter(!is.na(other_comorb)) %>% rename("co-morbidity"="other_comorb")
ncd_type<-do.call("rbind",list(diabetes,hta,asthma,cvd,cancer, obesity,tb,pd,rd,drepanocytosis,pregnancy,not_spec,other_comorb)) %>% group_by(`co-morbidity`) %>%
mutate(CFR=round((dead/confirmed)*100, digits = 2))
openxlsx::write.xlsx(ncd_type, "whoisddying_manuscript/New/ncd_type_table.xlsx")
ncd_oneormore<-big_data_analyse %>% filter(comcond_preexsist_count_top10!=0) %>% group_by(as.factor(comcond_preexsist_count_top10)) %>% summarise(confirmed=length(grep("Confirmed",report_classif_alice,ignore.case = T)),
n_dead=length(grep("Dead",patcourse_status, ignore.case = T)),
alive=length(grep("alive",patcourse_status, ignore.case = T))) %>%
mutate(CFR=round((n_dead/confirmed)*100, digits = 2)) %>% rename("Number.of.Comorbidities"=1)
openxlsx::write.xlsx(ncd_oneormore, "whoisddying_manuscript/New/ncd_count.xlsx")
################################################
#most common combinations
ncd_combin<-big_data_analyse %>% filter(comcond_preexist1_alice=="yes" & comcond_preexsist_count_top10>0) %>%
select(comcond_preexsist_count_top10,diabetes, asthma, hypertension,cancer,
renal_disease,cardiovascular_disease,obesity,tuberculosis,drepanocytosis,chronic_pulmonary)
ncd_combin_new <- data.frame(sapply(names(ncd_combin)[-1], function(x) ifelse(ncd_combin[, x] == 1, x, ncd_combin[, x])), stringsAsFactors = F)
ncd_combin_new$id<-rownames(ncd_combin_new)
count<-select(ncd_combin,comcond_preexsist_count_top10)
count$id<-rownames(count)
ncd_combin_all<- merge(ncd_combin_new,count, by="id")
test<-ncd_combin_all %>% group_by(comcond_preexsist_count_top10) %>% count_(names(ncd_combin_all)[-1]) %>% group_by(comcond_preexsist_count_top10) %>%
mutate(perc=n/sum(n)*100) %>% group_by(comcond_preexsist_count_top10) %>%
filter(n==max(n)) %>% tidyr::unite_("ComboVar",names(.)[2:11],sep=",") %>%
dplyr::mutate(ComboVar = stringr::str_replace_all(ComboVar, 'NA,?', ''))
openxlsx::write.xlsx(test, "whoisddying_manuscript/New/most_common_combinations.xlsx")
# #one way for combination plotting
#
# combanalysis<-ncd_combin_new %>% mutate(
# # combine the variables into one, using paste() with a semicolon separating any values
# all_comors = paste(diabetes, asthma, hypertension,cancer,
# renal_disease,cardiovascular_disease,obesity,tuberculosis,drepanocytosis,chronic_pulmonary, sep = "; "),
#
# # make a copy of all_symptoms variable, but of class "list" (which is required to use ggupset() in next step)
# all_comors_list = as.list(strsplit(all_comors, "; "))
# )
# ggplot(combanalysis,
# aes(x=all_comors_list)) +
# geom_bar() +
# ggupset::scale_x_upset(reverse = FALSE,
# n_intersections = 40,
# sets = c("diabetes", "asthma", "hypertension", "obesity", "cardiovascular_disease", "renal_disease", "drepanocytosis","chronic_pulmonary", "cancer", "tuberculosis")
# )+
# labs(title = "Comorbidities",
# subtitle = "10 most frequent combinations of comorbidities",
# caption = "Caption here.",
# x = "Comorbidity combination",
# y = "Frequency in dataset")
#UpsetR way
#taking only those dead
combanalysis_2_dead<- big_data_analyse %>% filter(comcond_preexist1_alice=="yes" & comcond_preexsist_count_top10>0 & vlfail==1) %>%
mutate(across(c(diabetes, asthma, hypertension, obesity, cardiovascular_disease, renal_disease, drepanocytosis,chronic_pulmonary, cancer, tuberculosis), ~ifelse(is.na(.),0,.))) %>%
rename(c("Cardiovascular Disease"=cardiovascular_disease, "Renal Disease"=renal_disease,"Chronic Pulmonary Disease"=chronic_pulmonary)) %>%
rename_all(tools::toTitleCase) %>% as.data.frame()
combanalysis_2_alivee<- big_data_analyse %>% filter(comcond_preexist1_alice=="yes" & comcond_preexsist_count_top10>0 & vlfail==0) %>%
mutate(across(c(diabetes, asthma, hypertension, obesity, cardiovascular_disease, renal_disease, drepanocytosis,chronic_pulmonary, cancer, tuberculosis), ~ifelse(is.na(.),0,.))) %>%
rename(c("Cardiovascular Disease"=cardiovascular_disease, "Renal Disease"=renal_disease,"Chronic Pulmonary Disease"=chronic_pulmonary)) %>%
rename_all(tools::toTitleCase) %>% as.data.frame()
#all cases
combanalysis_2_cases<- big_data_analyse %>% filter(comcond_preexist1_alice=="yes" & comcond_preexsist_count_top10>0) %>%
mutate(across(c(diabetes, asthma, hypertension, obesity, cardiovascular_disease, renal_disease, drepanocytosis,chronic_pulmonary, cancer, tuberculosis), ~ifelse(is.na(.),0,.))) %>%
rename(c("Cardiovascular Disease"=cardiovascular_disease, "Renal Disease"=renal_disease,"Chronic Pulmonary Disease"=chronic_pulmonary)) %>%
rename_all(tools::toTitleCase) %>% as.data.frame()
310#plotting for all dead as more relaven
plotobj_cases<-UpSetR::upset(
select(combanalysis_2_cases, Diabetes, Asthma, Hypertension, Obesity, `Cardiovascular Disease`, `Renal Disease`, Drepanocytosis,`Chronic Pulmonary Disease`, Cancer, Tuberculosis),
sets = c("Diabetes", "Asthma", "Hypertension", "Obesity", "Cardiovascular Disease", "Renal Disease", "Drepanocytosis","Chronic Pulmonary Disease", "Cancer", "Tuberculosis"),
order.by = "freq",
#group.by = "degree",
sets.bar.color = mycolors[1:10], # optional colors
empty.intersections = "on",
#nsets = 5,
number.angles = 0,
point.size = 3.5,
line.size = 2,
set_size.show = TRUE,
mainbar.y.label = "Comorbidity combinations",
nintersects=40,
sets.x.label = "Patients with comorbidity", text.scale = 1.75,set_size.angles = 0)
plotobj_dead<-UpSetR::upset(
select(combanalysis_2_dead, Diabetes, Asthma, Hypertension, Obesity, `Cardiovascular Disease`, `Renal Disease`, Drepanocytosis,`Chronic Pulmonary Disease`, Cancer, Tuberculosis),
sets = c("Diabetes", "Asthma", "Hypertension", "Obesity", "Cardiovascular Disease", "Renal Disease", "Drepanocytosis","Chronic Pulmonary Disease", "Cancer", "Tuberculosis"),
order.by = "freq",
#group.by = "degree",
sets.bar.color = mycolors[1:10], # optional colors
empty.intersections = "on",
#nsets = 5,
number.angles = 0,
point.size = 3.5,
line.size = 2,
set_size.show = TRUE,
mainbar.y.label = "Comorbidity combinations",
nintersects=40,
sets.x.label = "Patients with comorbidity", text.scale = 1.75,set_size.angles = 0)
plot.ls <- list(
A = plotobj_cases ,
B = plotobj_dead
)
for (v in names(plot.ls)) {
print(plot.ls[[v]])
grid::grid.text(v, x = 0.2, y=0.97, gp = grid::gpar(fontsize = 20))
grid::grid.edit('arrange', name = v)
vp <- grid::grid.grab()
plot.ls[[v]] <- vp
}
gridExtra::grid.arrange(grobs = plot.ls, ncol = 1, clip=F)
#weighted cox regression for number of comorbidities
failure_comorbs<- big_data_analyse %>% filter(comcond_preexsist_count_top10!="0") %>% mutate(across(comcond_preexsist_count_top10, factor, levels=c("no/not reported","1","2","3","4")))
cox_w_combos <- coxphw::coxphw(survival::Surv(perstime,vlfail) ~comcond_preexsist_count_top10 , data = failure_comorbs)
univ_combo<-data.frame(`Number of Comorbidities`= c("no/not reported", "1", "2","3","4"),
N=c(nrow(failure_comorbs[failure_comorbs$comcond_preexsist_count_top10=="no/not reported", ]),nrow(failure_comorbs[failure_comorbs$comcond_preexsist_count_top10=="1", ]),nrow(failure_comorbs[failure_comorbs$comcond_preexsist_count_top10=="2", ]),nrow(failure_comorbs[failure_comorbs$comcond_preexsist_count_top10=="3", ]),nrow(failure_comorbs[failure_comorbs$comcond_preexsist_count_top10=="4", ])),
n_dead=c(nrow(failure_comorbs[failure_comorbs$comcond_preexsist_count_top10=="no/not reported" & failure_comorbs$vlfail==1,]),nrow(failure_comorbs[failure_comorbs$comcond_preexsist_count_top10=="1" & failure_comorbs$vlfail==1,]),nrow(failure_comorbs[failure_comorbs$comcond_preexsist_count_top10=="2" & failure_comorbs$vlfail==1,]),nrow(failure_comorbs[failure_comorbs$comcond_preexsist_count_top10=="3" & failure_comorbs$vlfail==1,]), nrow(failure_comorbs[failure_comorbs$comcond_preexsist_count_top10=="4" & failure_comorbs$vlfail==1,])),
n_perstime=c(sum(failure_comorbs$perstime[failure_comorbs$comcond_preexsist_count_top10=="no/not reported"]),sum(failure_comorbs$perstime[failure_comorbs$comcond_preexsist_count_top10=="1"]),sum(failure_comorbs$perstime[failure_comorbs$comcond_preexsist_count_top10=="2"]),sum(failure_comorbs$perstime[failure_comorbs$comcond_preexsist_count_top10=="3"]),sum(failure_comorbs$perstime[failure_comorbs$comcond_preexsist_count_top10=="4"])),
HR=c(" ",round(exp(cox_w_combos$coefficients[1]), digits = 4),round(exp(cox_w_combos$coefficients[2]), digits = 4),round(exp(cox_w_combos$coefficients[3]), digits = 4),round(exp(cox_w_combos$coefficients[4]), digits = 2)),
`95% CI`=c(" ",paste((round(cox_w_combos$ci.lower[1], digits=2)),unlist(round(cox_w_combos$ci.upper[1], digits = 2)), sep=","),
paste((round(cox_w_combos$ci.lower[2], digits=2)),unlist(round(cox_w_combos$ci.upper[2], digits = 2)), sep=","),
paste((round(cox_w_combos$ci.lower[3], digits=2)),unlist(round(cox_w_combos$ci.upper[3], digits = 2)), sep=","),
paste((round(cox_w_combos$ci.lower[4], digits=2)),unlist(round(cox_w_combos$ci.upper[4], digits = 2)), sep=",")),
`p-value`=c(" ", round(cox_w_combos$prob[1], digits = 3),round(cox_w_combos$prob[2], digits = 3),round(cox_w_combos$prob[3], digits = 3),round(cox_w_combos$prob[4], digits = 3)),
stringsAsFactors=FALSE)
merged_comb<-merge(ncd_oneormore,univ_combo, by="Number.of.Comorbidities", all=T) %>% select(-c(confirmed, n_dead.x,alive)) %>%
arrange(Number.of.Comorbidities) %>%flextable() %>% set_header_labels(
values = c(Number.of.Comorbidities="Number of Comorbidities",n_dead.y="Dead (N)", n_perstime="Time (days)",HR="aHR", X95..CI="95% CI", p.value="p-value"), top = T)
merged_comb <- merged_comb %>%
footnote(i = 1, j = 6:7,
value = as_paragraph(
c("aHR = average Hazard Ratio, CI = Confidence Interval")),
ref_symbols = c("*"), part="header") %>% fontsize(size = 14, part = "header") %>% fontsize(size = 13, part = "body") %>% fontsize(size = 12, part = "footer") %>% flextable::save_as_docx(path="whoisddying_manuscript/New/univaritate_numbercomorbs.docx")
##################################################
#plotting
#distribution of cases with age group
g1<-ggplot(all_sexage,aes(agegroup,confirmed_perc, fill=patinfo_sex)) + geom_bar(stat="identity", width=.5, position = "dodge") +
pub_theme4 + scale_fill_manual(values = mycolors[c(2,10)]) + labs(y="% of cases", x= "Age group", fill="Sex") +
geom_text(aes(label=paste0(round(confirmed_perc), "%"), fontface= 2), size=8, position = position_dodge(width = 0.6), vjust=-0.2)
ggsave("whoisddying_manuscript/New/age_sex_cases.png", width=20, height = 15)
#distribution of death with age group
g2<-ggplot(all_sexage,aes(agegroup,dead_perc, fill=patinfo_sex)) + geom_bar(stat="identity", width=.5, position = "dodge") +
pub_theme4 + scale_fill_manual(values = mycolors[c(2,10)]) + labs(y="% of deaths", x= "Age group", fill="Sex") +
geom_text(aes(label=paste0(round(dead_perc), "%"), fontface= 2),size=8,position = position_dodge(width = 0.6), vjust=-0.2)
ggsave("whoisddying_manuscript/New/age_sex_death.png", width=20, height = 15)
p1<-age_pyramid(all_sexage,agegroup, split_by = patinfo_sex, count =confirmed_perc ) + pub_theme4 + scale_fill_manual(values = mycolors[c(2,10)]) + labs(y="% of cases", x= "Age group", fill="Sex")
ggsave("whoisddying_manuscript/New/age_sex_cases_pyramid_percentageallcasesUSE.png", width=20, height = 15)
p2<-age_pyramid(all_sexage,agegroup, split_by = patinfo_sex, count =dead_perc ) + pub_theme4 + scale_fill_manual(values = mycolors[c(2,10)]) + labs(y="% of deaths", x= "Age group", fill="Sex")
ggsave("whoisddying_manuscript/New/age_sex_death_pyramid_percentagealldedUSE.png", width=20, height = 15)
agesexcomb <- ggarrange(p1, p2,
labels = c("A", "B"),
ncol = 2, nrow = 1, font.label = list(size = 24))
ggsave("whoisddying_manuscript/New/age_sex_combinedfigure.png", agesexcomb,width=25, height = 15)
#cfr
g3<-ggplot(cfr_bysexage,aes(agegroup,CFR,group=patinfo_sex, fill=patinfo_sex)) + geom_point(aes(colour=patinfo_sex, group=patinfo_sex), size=3, show.legend = F) + geom_line(aes(colour=patinfo_sex, group=patinfo_sex))+
pub_theme4 + scale_colour_manual(values = mycolors[c(2,10)]) + labs(y="CFR (%)", x= "Age group", colour="Sex") +
geom_text(aes(label=paste0(round(CFR, digits = 1), "%"), colour=patinfo_sex, fontface= 2), size=8,position = position_dodge(width = 1), vjust=-0.5)
ggsave("whoisddying_manuscript/New/age_sex_cfr.png", width=20, height = 15)
#ncd and CFR boxplot which shows median IQR of all the counries in this analysis
g4<-ncd_new %>% filter(comcond_preexist1_alice=="no/not reported" |comcond_preexist1_alice=="yes") %>%
ggplot(aes(comcond_preexist1_alice, CFR)) + geom_boxplot(aes(fill=as.factor(comcond_preexist1_alice)), show.legend = F) +
pub_theme4 + theme(axis.text.x=element_text(angle=0)) + labs(y="CFR (%)", x= "Co-morbidity presence") + scale_fill_manual(values = mycolors2[c(1,3)])
ggsave("whoisddying_manuscript/New/ncd_cfr_countriesmedian_boxplot.png", width=20, height = 15)
g5<-ncd_all_chi_summary %>% ggplot(aes(comcond_preexist1_alice, CFR)) + geom_bar(aes(fill=as.factor(comcond_preexist1_alice)),stat="identity", show.legend = F) +
pub_theme4 + theme(axis.text.x=element_text(angle=0)) + labs(y="CFR (%)", x= "Co-morbidity presence") + scale_fill_manual(values = mycolors2[c(1,3)])
ggsave("whoisddying_manuscript/New/ncd_cfr_alltogether_barplot.png", width=20, height = 15)
#ncd type with CFR
ncd_type_long<- ncd_type %>% tidyr::pivot_longer(c(dead,alive), names_to= "dead/alive")
ncd_type_long$`co-morbidity`<-tools::toTitleCase(ncd_type_long$`co-morbidity`)
cfr_labels<- ncd_type_long %>% select(c("co-morbidity","CFR")) %>% unique(.)
g6<-ncd_type_long %>% ggplot(aes(factor(`co-morbidity`,levels=`co-morbidity`[`dead/alive`=="dead"][order(value[`dead/alive`=="dead"])]), value, fill=`dead/alive`, group=`dead/alive`)) + geom_bar(aes(fill=as.factor(`dead/alive`)),stat="identity", show.legend = T) +
pub_theme4 + theme(axis.text.x=element_text(angle=0)) + scale_fill_manual(values = mycolors2[c(3,1)]) +
coord_flip() + labs(y="Number of cases", x= "Condition", fill="Outcome") + geom_text(data=cfr_labels,aes(x=`co-morbidity`,y=1400,label=paste0("CFR= ",round(CFR, digits = 1), "%"),fontface= 2),
inherit.aes=FALSE, size=8,angle=0,hjust=1)
ggsave("whoisddying_manuscript/New/ncd_type_barplot.png", width=20, height = 15)
#health care workers CFR
hcw_long<- hcw %>% tidyr::pivot_longer(c(dead,alive), names_to= "dead/alive")
cfr_hcw_labels<- hcw_long %>% select(c("country_iso","CFR")) %>% unique(.)
g7<-hcw_long %>% ggplot(aes(factor(country_iso,levels=country_iso[`dead/alive`=="dead"][order(value[`dead/alive`=="dead"])]), value, fill=`dead/alive`, group=`dead/alive`)) + geom_bar(stat="identity", width=.5, position = "stack") +
pub_theme4 + theme(axis.text.x=element_text(angle=0)) + scale_fill_manual(values = mycolors2[c(3,1)]) + labs(y="Number of cases", x= "Country", fill="Outcome") +
geom_text(data=cfr_hcw_labels,aes(x=country_iso,y=700,label=paste0(round(CFR, digits = 1), "%"),fontface= 2), position = position_dodge(width=1),
inherit.aes=F, size=8,angle=0, hjust=1) + annotate("text", x = 0.75, y=750, label = "CFR=", size=8,fontface=2)
ggsave("whoisddying_manuscript/New/hcw_cfr_barplot.png", width=20, height = 15)
###### cox regression time ot death
#dataset used in failure
failure<-timetodeath_numbers %>% select(patinfo_sex_binary, patinfo_ageonset, hcw_binary,capital_final_binary,comcond_preexist1_alice_binary, pregant_binary,SurvObj)
sjlabelled::set_label(failure$patinfo_ageonset) <- "Age"
sjlabelled::set_label(failure$patinfo_sex_binary) <- "Sex (Male)"
sjlabelled::set_label(failure$comcond_preexist1_alice_binary) <- "Presence of comorbidity"
sjlabelled::set_label(failure$capital_final_binary) <- "Residence in capital city"
sjlabelled::set_label(failure$hcw_binary) <- "Health Care Worker"
summary_N_cox<-timetodeath_numbers %>% select(patinfo_sex, patinfo_ageonset, hcw,capital_final,comcond_preexist1_alice, vlfail) %>% mutate(across(-c(patinfo_ageonset),as.character)) %>% replace(is.na(.), "Not reported") %>% mutate(across(-c(patinfo_ageonset),as.factor))
levels(summary_N_cox$patinfo_sex) <- c('Female', 'Males')
levels(summary_N_cox$hcw) <- c('No', 'Not reported','Yes')
levels(summary_N_cox$capital_final) <- c('No', 'Not reported','Yes')
levels(summary_N_cox$comcond_preexist1_alice) <- c('No', 'Not reported','Yes')
levels(summary_N_cox$vlfail)<- c("Alive", "Dead")
sjlabelled::set_label(summary_N_cox$patinfo_ageonset) <- "Age"
sjlabelled::set_label(summary_N_cox$patinfo_sex) <- "Sex (Male)"
sjlabelled::set_label(summary_N_cox$comcond_preexist1_alice) <- "Presence of comorbidity"
sjlabelled::set_label(summary_N_cox$capital_final) <- "Residence in capital city"
sjlabelled::set_label(summary_N_cox$hcw) <- "Health Care Worker"
sjlabelled::set_label(summary_N_cox$vlfail) <- "Mortality"
#summary variables used in cox
summary<-summary_N_cox %>%select(-vlfail) %>% tbl_summary() %>% italicize_levels()
summary_deaths<- summary_N_cox%>% tbl_summary(by= vlfail,percent = "col") %>% modify_footnote( update=everything() ~ "Statistic presented: n (Case Fatality Rate (%)); Median (IQR)") %>% italicize_levels() %>%
modify_header(stat_by = "**{level}**, N = {n} ({style_percent(p, symbol = TRUE)})") %>% modify_header(update = stat_1 ~ "**drop column**")
tbl_merge(list(summary,summary_deaths)) %>% modify_spanning_header(everything() ~ NA_character_) %>% gtsummary::as_flex_table() %>% flextable::save_as_docx(path="whoisddying_manuscript/New/summary_N_vars_deadandCFR_DROPALIVECOL.docx")
#ensure drop Alive column post export
## univariate table
univ <- gtsummary::tbl_uvregression(failure,survival::coxph,y=SurvObj, exponentiate = T)
gtsummary::as_flex_table(univ) %>%
flextable::save_as_docx(path="whoisddying_manuscript/New/HR_uv.docx")
## ALEX: left age as continuous (no need to make binary):
## interpretation is that for each additional year the hazard of death was x higher
failure_2<-timetodeath_numbers %>% select(patinfo_sex_binary, patinfo_ageonset, hcw_binary,capital_final_binary,comcond_preexist1_alice_binary, vlfail, perstime, pregant_binary) %>%
filter(!is.na(vlfail) & !is.na(patinfo_ageonset) & !is.na(patinfo_sex_binary) & !is.na(comcond_preexist1_alice_binary) & !is.na(capital_final_binary) & !is.na(hcw_binary))
set_label(failure_2$vlfail) <- "Mortality"
set_label(failure_2$patinfo_ageonset) <- "Age"
set_label(failure_2$patinfo_sex_binary) <- "Sex (Male)"
set_label(failure_2$comcond_preexist1_alice_binary) <- "Presence of comorbidity"
set_label(failure_2$capital_final_binary) <- "Residence in capital city"
set_label(failure_2$hcw_binary) <- "Health Care Worker"
#cox <- survival::coxph(survival::Surv(perstime,vlfail) ~ patinfo_sex_binary + patinfo_ageonset + hcw_binary + capital_final_binary + comcond_preexist1_alice_binary, data = failure_2)
cox_updated <- survival::coxph(survival::Surv(perstime,vlfail) ~ patinfo_sex_binary + hcw_binary + capital_final_binary + comcond_preexist1_alice_binary*patinfo_ageonset, data = failure_2)
#found interactions with comorbidity and age. Model has been updated accordingly as above
multiv <- gtsummary::tbl_regression(cox_updated, exponentiate = TRUE)
combined_tbl <- gtsummary::tbl_merge(
tbls = list(univ, multiv),
tab_spanner = c("**Univariate**", "**Multivariable**")
)
## output combined table
gtsummary::as_flex_table(combined_tbl) %>%
flextable::save_as_docx(path="whoisddying_manuscript/New/HR_univandmultiv.docx")
#assumptions change variable names for easy plotting
failure_3<- failure_2 %>% rename(c("Sex (Male)"=patinfo_sex_binary, "Age (continuous)"=patinfo_ageonset, "Health Care worker Status"=hcw_binary, "Residence in capital city status"=capital_final_binary, "Comorbidity status"=comcond_preexist1_alice_binary))
cox_updated_2 <- survival::coxph(survival::Surv(perstime,vlfail) ~ `Sex (Male)` + `Health Care worker Status` + `Residence in capital city status` + `Comorbidity status`*`Age (continuous)`, data = failure_3)
zp <- cox.zph(cox, transform = "km")
plotzp<-survminer::ggcoxzph(zp)
new<- ggpubr::ggpar(plotzp,font.main=14, font.submain = 14, font.x=12, font.y=12)
#forest plot
survminer::ggforest(cox_updated_2,fontsize = 1) + pub_theme4
ggsave("whoisddying_manuscript/New/forrect_updatedmodel.png")
# # cox didnt meet assumptions for capital city and comorb varible. therefore vary them with time
# # use time splitter method as follows fot testing
#
# spl_failure_2 <-
# timetodeath_numbers %>% select(patinfo_sex_binary, patinfo_ageonset, hcw_binary,capital_final_binary,comcond_preexist1_alice_binary, vlfail, perstime) %>%
# Greg::timeSplitter(by = .5,
# event_var = "vlfail",
# event_start_status = 0,
# time_var = "perstime",
# time_related_vars = c("patinfo_ageonset"))
#
# interval_cox <-update(cox, Surv(Start_time, Stop_time, vlfail) ~ .,
# data = spl_failure_2)
#
# #now vary caputal city with time
# time_int_model <- update(interval_cox,.~.+capital_final_binary:Start_time)
#
# #check assumptions
# summary(time_int_model)
# cox.zph(time_int_model, transform = "km")
# #add comorbidity varying
# time_int_model_2 <- update(time_int_model,.~.+comcond_preexist1_alice_binary:Start_time)
# # again violating assumptions
#
# #test if capital city is non-linear
# spl_failure_2 %<>%
# mutate(capital_start = capital_final_binary * Start_time)
# anova(time_int_model,
# update(time_int_model, .~.+I(capital_start^2)))
#coxphw for non porportionality - some variable did not meet assumptions use weighted cox regressiom
#mulitvariable
#cox_w <- coxphw::coxphw(survival::Surv(perstime,vlfail) ~ patinfo_sex_binary + patinfo_ageonset + hcw_binary + capital_final_binary + comcond_preexist1_alice_binary, data = failure_2)
cox_w_updated <- coxphw::coxphw(survival::Surv(perstime,vlfail) ~ patinfo_sex_binary + hcw_binary + capital_final_binary + comcond_preexist1_alice_binary*patinfo_ageonset, data = failure_2)
multitab <- data.frame(Characteristic= c("Sex (male)","Health care worker status","Residence in capital city status","Comorbidity status","Age (continuous)","Comorbidity status*Age (continuous)","Pregnancy"),
N=c(nrow(failure_2),nrow(failure_2),nrow(failure_2),nrow(failure_2),nrow(failure_2),nrow(failure_2)," "),
HR=c(unlist(round(exp(cox_w_updated$coefficients[,1]), digits = 2)),NA),
`95% CI`=c(paste(unlist(round(cox_w_updated$ci.lower[,1], digits=2)),unlist(round(cox_w_updated$ci.upper[,1], digits = 2)), sep=",")," "),
`p-value`=c(unlist(round(cox_w_updated$prob, digits = 3))," "),
stringsAsFactors=FALSE)
#univariate
cox_w_sex <- coxphw::coxphw(survival::Surv(perstime,vlfail) ~ patinfo_sex_binary, data = failure_2)
cox_w_age <- coxphw::coxphw(survival::Surv(perstime,vlfail) ~ patinfo_ageonset, data = failure_2)
cox_w_hcw <- coxphw::coxphw(survival::Surv(perstime,vlfail) ~ hcw_binary, data = failure_2)
cox_w_cap <- coxphw::coxphw(survival::Surv(perstime,vlfail) ~ capital_final_binary , data = failure_2)
cox_w_com <- coxphw::coxphw(survival::Surv(perstime,vlfail) ~ comcond_preexist1_alice_binary, data = failure_2)
cox_w_preg <- coxphw::coxphw(survival::Surv(perstime,vlfail) ~ pregant_binary, data = failure_2)
unitab <- data.frame(Characteristic= c("Sex (male)","Age (continuous)","Health care worker status","Residence in capital city status","Comorbidity status","Pregnancy"),
N=c(nrow(failure_2),nrow(failure_2),nrow(failure_2),nrow(failure_2),nrow(failure_2),nrow(failure_2[failure_2$patinfo_sex_binary==0,])),
n_dead=c(nrow(failure_2[failure_2$patinfo_sex_binary==1 & failure_2$vlfail==1,])," ",nrow(failure_2[failure_2$hcw_binary==1 & failure_2$vlfail==1,]),nrow(failure_2[failure_2$capital_final_binary==1 & failure_2$vlfail==1,]),nrow(failure_2[failure_2$comcond_preexist1_alice_binary==1 & failure_2$vlfail==1,]),nrow(failure_2[failure_2$pregant_binary==1 & failure_2$vlfail==1,])),
n_perstime=c(sum(failure_2$perstime[failure_2$patinfo_sex_binary==1])," ", sum(failure_2$perstime[failure_2$hcw_binary==1]), sum(failure_2$perstime[failure_2$capital_final_binary==1]),sum(failure_2$perstime[failure_2$comcond_preexist1_alice_binary==1]), sum(failure_2$perstime[failure_2$pregant_binary==1], na.rm = T)),
HR=c(round(exp(cox_w_sex$coefficients[1]), digits = 4),round(exp(cox_w_age$coefficients[1]), digits = 4),round(exp(cox_w_hcw$coefficients[1]), digits = 4),round(exp(cox_w_cap$coefficients[1]), digits = 2),round(exp(cox_w_com$coefficients[1]), digits = 2),round(exp(cox_w_preg$coefficients[1]), digits = 2)),
`95% CI`=c(paste((round(cox_w_sex$ci.lower[1], digits=2)),unlist(round(cox_w_sex$ci.upper[1], digits = 2)), sep=","),
paste((round(cox_w_age$ci.lower[1], digits=2)),unlist(round(cox_w_age$ci.upper[1], digits = 2)), sep=","),
paste((round(cox_w_hcw$ci.lower[1], digits=2)),unlist(round(cox_w_hcw$ci.upper[1], digits = 2)), sep=","),
paste((round(cox_w_cap$ci.lower[1], digits=2)),unlist(round(cox_w_cap$ci.upper[1], digits = 2)), sep=","),
paste((round(cox_w_com$ci.lower[1], digits=2)),unlist(round(cox_w_com$ci.upper[1], digits = 2)), sep=","),
paste((round(cox_w_preg$ci.lower[1], digits=2)),unlist(round(cox_w_preg$ci.upper[1], digits = 2)), sep=",")),
`p-value`=c(round(cox_w_sex$prob[1], digits = 3),round(cox_w_age$prob[1], digits = 3),round(cox_w_hcw$prob[1], digits = 3),round(cox_w_cap$prob[1], digits = 3),round(cox_w_com$prob[1], digits = 3), round(cox_w_preg$prob[1], digits = 3)),
stringsAsFactors=FALSE)
combtab<-merge(unitab,multitab, by = "Characteristic", all=T) %>% select(-c(N.y)) %>%mutate(Characteristic = factor(Characteristic, levels = c("Sex (male)","Age (continuous)","Health care worker status","Residence in capital city status","Comorbidity status","Comorbidity status*Age (continuous)", "Pregnancy"))) %>%
arrange(Characteristic) %>%flextable() %>% set_header_labels(
values = c(Characterisitc="Characterisitc",n_dead="Dead (N)", n_perstime="Time (days)",N.x="N",HR.x="aHR*", X95..CI.x="95% CI*", p.value.x="p-value",HR.y="aaHR*", X95..CI.y="95% CI*", p.value.y="p-value"), top = T)
combtab<-add_header_row(combtab,values = c(" ", " "," "," "," ", "Univariate", "Univariate"," ", "Multivariate","Multivariate"), top = T ) %>% merge_h(part = "header") %>%
footnote(i = c(1), j = c(5,6,8,9),
value = as_paragraph(
c("aHR = average Hazard Ratio, CI = Confidence Interval, aaHR = average adjusted Hazard Ratio")),
ref_symbols = c("*")) %>% fontsize(size = 14, part = "header") %>% fontsize(size = 13, part = "body") %>% fontsize(size = 12, part = "footer") %>% flextable::save_as_docx(path="whoisddying_manuscript/New/HR_univandmultiv_weighted.docx")
##Counding / effect modifiers interation testing:
### testing interactions for each variable, Alex has extra code to complete a table which identifies if confounding or not, for now just eyeball
#Interaction if TRUE FALSE modes of each variable CIs dont overlap, crude will overlp with the FALSE mode if it is effect modifying
fail_strat<-failure_2
fail_strat$patinfo_ageonset_binary <-ifelse(fail_strat$patinfo_ageonset>=40,1,0)
fail_strat$vlfail<-as.logical(fail_strat$vlfail)
vars<-c(names(fail_strat[c(1,3,4,5,9)]))
outtab<-list()
for(i in vars){
fail_strat[[i]]<- as.logical(fail_strat[[i]])
}
for(i in vars){
strat_var= tidyselect::vars_select(colnames(fail_strat),i)
strat_tab<- epibuffet::tab_univariate(fail_strat,vlfail, vars[vars!=i], perstime = perstime, strata=strat_var, measure="IRR", woolf_test = T)
strat_tab$confounding<- NA
strat_tab$effectmod<-NA
strat_tab$stratifier<-i
outtab[[i]]<-strat_tab
openxlsx::write.xlsx(outtab[[i]],paste("whoisddying_manuscript/New/", i,".xlsx"))
}
## survival times
#just look at median time to death in those that died across the variables used in cox as we cannot use median survival time as 50% cohort not dead
deadonly_timeto<-timetodeath_numbers %>% select(patinfo_sex_binary, patinfo_ageonset, hcw_binary,capital_final_binary,comcond_preexist1_alice_binary, vlfail, perstime) %>% mutate(across(-c(patinfo_ageonset,perstime),as.factor)) %>% filter(vlfail==1)
deadonly_timeto$age_binary60<-ifelse(deadonly_timeto$patinfo_ageonset>=60,1,0)
deadonly_timeto$age_binary60<-as.factor(deadonly_timeto$age_binary60)
kw.sex<-kruskal.test(perstime ~patinfo_sex_binary,deadonly_timeto)
kw.age<-kruskal.test(perstime ~age_binary60,deadonly_timeto)
kw.hcw<-kruskal.test(perstime ~hcw_binary,deadonly_timeto)
kw.captial<-kruskal.test(perstime ~capital_final_binary,deadonly_timeto)
kw.comcond<-kruskal.test(perstime ~comcond_preexist1_alice_binary,deadonly_timeto)
levels(deadonly_timeto$patinfo_sex_binary) <- c('Female', 'Male')
levels(deadonly_timeto$age_binary60) <-c("<60", "≥60")
levels(deadonly_timeto$hcw_binary) <- c('No/Not reported','Yes')
levels(deadonly_timeto$capital_final_binary) <- c('No/Not reported', 'Yes')
levels(deadonly_timeto$comcond_preexist1_alice_binary) <- c('No/Not reported','Yes')
sjlabelled::set_label(deadonly_timeto$age_binary60) <- "Age"
sjlabelled::set_label(deadonly_timeto$patinfo_sex_binary) <- "Sex (Male)"
sjlabelled::set_label(deadonly_timeto$comcond_preexist1_alice_binary) <- "Presence of comorbidity"
sjlabelled::set_label(deadonly_timeto$capital_final_binary) <- "Residence in capital city"
sjlabelled::set_label(deadonly_timeto$hcw_binary) <- "Health Care Worker"
patinfo_sex_binary<- deadonly_timeto %>% filter(!is.na(patinfo_sex_binary)) %>% group_by(patinfo_sex_binary) %>% summarise(N=n(), median=paste0(median(perstime,na.rm = T),", (" ,quantile(perstime,0.25), ",",quantile(perstime,0.75), ")")) %>% mutate(pval="") %>%
add_row(patinfo_sex_binary = "Sex", N=NA, median = NA, pval = as.character(round(as.numeric(kw.sex[3]), digits = 4))) %>% rename(Characteristic=patinfo_sex_binary)
patinfo_sex_binary<- patinfo_sex_binary[c(3,1:2),]
age_binary60<- deadonly_timeto %>% filter(!is.na(age_binary60)) %>% group_by(age_binary60) %>% summarise(N=n(), median=paste0(median(perstime),", (" ,quantile(perstime,0.25), ",",quantile(perstime,0.75), ")")) %>% mutate(pval="") %>%
add_row(age_binary60 = "Age", N=NA,median = NA, pval = as.character(round(as.numeric(kw.age[3]), digits = 4))) %>% rename(Characteristic=age_binary60)
age_binary60<- age_binary60[c(3,1:2),]
hcw_binary<- deadonly_timeto %>% group_by(hcw_binary) %>% summarise(N=n(), median=paste0(median(perstime),", (" ,quantile(perstime,0.25), ";",quantile(perstime,0.75), ")")) %>% mutate(pval="") %>%
add_row(hcw_binary = "Health care Worker", N=NA, median = NA, pval = as.character(round(as.numeric(kw.hcw[3]), digits = 4))) %>% rename(Characteristic=hcw_binary)
hcw_binary<- hcw_binary[c(3,1:2),]
capital_final_binary<- deadonly_timeto %>% group_by(capital_final_binary) %>% summarise(N=n(), median=paste0(median(perstime),", (" ,quantile(perstime,0.25), ",",quantile(perstime,0.75), ")")) %>% mutate(pval="") %>%
add_row(capital_final_binary = "Residence in capital city", N=NA, median = NA, pval = as.character(round(as.numeric(kw.captial[3]), digits = 4))) %>% rename(Characteristic=capital_final_binary)
capital_final_binary<- capital_final_binary[c(3,1:2),]
comcond_preexist1_alice_binary<- deadonly_timeto %>% group_by(comcond_preexist1_alice_binary) %>% summarise(N=n(),median=paste0(median(perstime),", (" ,quantile(perstime,0.25), ",",quantile(perstime,0.75), ")")) %>% mutate(pval="") %>%
add_row(comcond_preexist1_alice_binary = "Presence of comorbidity", N=NA, median = NA, pval = as.character(round(as.numeric(kw.comcond[3]), digits = 4))) %>% rename(Characteristic=comcond_preexist1_alice_binary)
comcond_preexist1_alice_binary<- comcond_preexist1_alice_binary[c(3,1:2),]
deadcount<-as.character(nrow(deadonly_timeto))
deadonly_timeto_summary<-do.call("rbind",list(patinfo_sex_binary,age_binary60,hcw_binary,capital_final_binary,comcond_preexist1_alice_binary)) %>% mutate(N = if_else(is.na(N), "", as.character(N)),median = if_else(is.na(median), "", as.character(median)), pval = if_else(is.na(pval), "", as.character(pval))) %>% flextable() %>%
set_header_labels(values = c(Characterisitc="Characterisitc",N=paste("Dead, N=",deadcount), median="Time to death (days)", pval="p-value")) %>% footnote(i = c(1), j = c(3,4), part="header",value = as_paragraph(c("Statistics presented: Median (IQR)", "Statistical test: Kruskal-Wallis rank sum test ")),ref_symbols = c("*","†")) %>%
italic(i = c(2,3,5,6,8,9,11,12,14,15), j = 1, italic = TRUE, part = "body") %>%
fontsize(size = 14, part = "header") %>% fontsize(size = 13, part = "body") %>% fontsize(size = 12, part = "footer")
deadonly_timeto_summary%>% flextable::save_as_docx(path="whoisddying_manuscript/New/timetodeath_median.docx")
#time to death by region
timeto_country<-timetodeath_numbers %>% filter(vlfail==1) %>% group_by(country_full) %>% summarise(N=n(),median=paste0(median(perstime),", (" ,quantile(perstime,0.25), ",",quantile(perstime,0.75), ")")) %>%
flextable() %>%
set_header_labels(values = c(country_full="Country", N= "Dead (N)", median="Time to death (days)")) %>%
footnote(i = c(1), j = c(3), part="header",value = as_paragraph(c("Statistics presented: Median (IQR)")),ref_symbols = c("*")) %>%
fontsize(size = 14, part = "header") %>% fontsize(size = 13, part = "body") %>% fontsize(size = 12, part = "footer")
timeto_country%>% flextable::save_as_docx(path="whoisddying_manuscript/New/timetodeath_median_bycountry.docx")
#
# gtsummary::tbl_survfit(
# timetodeath_numbers,
# y = SurvObj,
# include = c(patinfo_sex_binary, patinfo_ageonset, hcw_binary,capital_final_binary,comcond_preexist1_alice_binary),
# probs = 0.5,
# label_header = "**Median Survival**"
# )
# tbl_survfit_ex3 <-
# list(survfit(Surv(perstime,vlfail) ~ 1, failure_2),
# survfit(Surv(perstime,vlfail) ~ patinfo_sex_binary, failure_2),
# survfit(Surv(perstime,vlfail) ~ patinfo_age_binary, failure_2),
# survfit(Surv(perstime,vlfail) ~ hcw_binary, failure_2),
# survfit(Surv(perstime,vlfail) ~ capital_final_binary, failure_2),
# survfit(Surv(perstime,vlfail) ~ comcond_preexist1_alice_binary, failure_2)) %>%
# gtsummary::tbl_survfit(probs = c(0.5))
survminer::ggsurvplot(
fit = survfit(Surv(perstime,vlfail) ~ patinfo_sex_binary, deadonly_timeto),
xlab = "time",
ylab = "Overall survival probability")
# ###glm model
# library(rms)
# library(foreign)
# library(pROC)
# library(ResourceSelection)
# library(sjPlot)
# library(sjlabelled)
# library(sjmisc)
# library(ggplot2)
#
#
# #model fit on cases with complete variables
# fit.dat<- fit %>% filter(!is.na(vlfail) & !is.na(patinfo_ageonset) & !is.na(patinfo_sex_binary) & !is.na(comcond_preexist1_alice_binary) & !is.na(capital_final_binary) & !is.na(hcw_binary))
# dd <- datadist(fit.dat)
# options(datadist='dd')
#
# set_label(fit.dat$vlfail) <- "Mortality"
# set_label(fit.dat$patinfo_ageonset) <- "Age"
# set_label(fit.dat$patinfo_sex_binary) <- "Sex (Male)"
# set_label(fit.dat$comcond_preexist1_alice_binary) <- "Presence of comorbidity"
# set_label(fit.dat$capital_final_binary) <- "Residence in capital city"
# set_label(fit.dat$hcw_binary) <- "Health Care Worker"
#
#
# #considering all of these variables, age can be continous
# fit.model<-lrm(vlfail~patinfo_ageonset + patinfo_sex_binary +comcond_preexist1_alice_binary +capital_final_binary+ hcw_binary, data=fit.dat, x=T, y=T)
# summary(fit.model)
#
# #need a glm oject in gtsummary same as above just allows tabulation
# fit.model.2 <- glm(vlfail~patinfo_ageonset + patinfo_sex_binary +comcond_preexist1_alice_binary +capital_final_binary+ hcw_binary, data=fit.dat, family = binomial)
# summary(fit.model.2)
# flextable::save_as_docx(gtsummary::as_flex_table(gtsummary::tbl_regression(fit.model.2,exponentiate = T)),path="whoisddying_manuscript/New/OR_forlrm.docx")
#
#
# # backward selection moethod with a p-value of 0.157
# fastbw(fit.model, type="individual", rule="p", sls=0.157)
# #no factors deleted
#
# # store the predicted probabilities by using the predict function
# predict.fit <- predict(fit.model, type="fitted")
# myROC <- roc(fit.model$y, predict.fit, pl=T, ci=TRUE)
# myROC
# hoslem.test(fit.model$y, predict.fit)
#
# g8<-plot_model(fit.model, colour= mycolours2,value.size = 9,
# value.offset = 0.3,
# dot.size = 5,
# line.size = 3,
# sort.est = T, vline.color = "black", show.values = T) + pub_theme4
# ggsave("whoisddying_manuscript/New/lrm_forrest.png",width=20, height = 15)
###############################################################################
## map of incidence vs CFR
###############################################################################
## load packages
library("rio")
library("epichecks") # github package from https://github.com/R4IDSR/epichecks
library("sf")
library("ggplot2")
library("dplyr")
library("janitor")
library("stringr")
library("forcats")
library("ggspatial")
library("patchwork")
## import alice's count dataset
counts <- rio::import("https://docs.google.com/spreadsheets/d/1hAgFXKzxrCC4O_4nqNlrid1uZYHNQbwf/edit#gid=177225701") %>%
filter(country_iso != "sum") %>%
select(-x2020)
## read in UN population data
population <- rio::import("https://population.un.org/wpp/Download/Files/1_Indicators%20(Standard)/EXCEL_FILES/1_Population/WPP2019_POP_F01_1_TOTAL_POPULATION_BOTH_SEXES.xlsx")
## clean up the population data
## fix names
colnames(population) <- population[12, ]
population <- clean_names(population)
## drop extras at top
population <- population[-c(1:12), ]
## back up original
og_population <- population
## create iso code variable
population$country_iso <- countrycode::countrycode(population$country_code,
origin = "un",
destination = "iso3c")
## only keep countries listed in counts
population <- population %>%
filter(country_iso %in% counts$country_iso) %>%
select(region_subregion_country_or_area, country_iso, x2020)
## merge counts with population
counts <- left_join(counts, population, by = "country_iso")
## correct population as is in thousands
counts <- mutate(counts,
x2020 = as.numeric(x2020) * 1000)
## add in important vars
counts <- mutate(counts,
## calculate incidence per 10k
incidence = confirmed / x2020 * 10000,
## create groups of incidence
incidence_grp = cut(incidence,
breaks = c(0, 0.01, 0.5, 1.5, 2.5, 3.5, Inf),
include.lowest = TRUE,
right = TRUE,
labels = c("0", "<0.5", "0.5-1.4", "1.5-2.4", "2.5-3.4", "3.5+")),
## create CFR groups
cfr_grp = cut(CFR,
breaks = c(0, 0.01, 0.9, 3.5, 5.5, 7.5, Inf),
include.lowest = TRUE,
labels = c("0","<1","1-3.4", "3.5-5.4", "5.5-7.5", "7.5+")),
## create an id for joining colour defs
merger = str_glue("{as.numeric(incidence_grp)-1}-{as.numeric(cfr_grp)-1}")
)
## get file path
shp_path <- system.file("extdata", "AFRO.shp", package = "epichecks")
## read in shapefile
afro_shp <- read_sf(shp_path)
## get list of islands to subset
islands <- c("Cape Verde",
"Comoros",
"Mauritius",
"Seychelles",
"Sao Tome and Principe")
## Prepare data for plotting ---------------------------------------------------
## define your colour palette (need to have one more to include 0?)
cols2 <- make_colours(num1 = 6,
num2 = 6)
## minus 1 from the colour numbering to include 0 counts
cols2 <- cols2 %>%
mutate_at(.vars = c("rws", "cls"),
.funs = funs(. - 1))
## create a unique identifier by combining the row and columnn counts
## this will match the counts of reports after
cols2 <- cols2 %>%
mutate(merger = str_glue("{rws}-{cls}"))
## join the colours to the counts
map_counts <- left_join(
## only keep certain columns from the counts dataset
select(counts,
country_iso,
incidence_grp,
cfr_grp,
merger
),
cols2, by = "merger")
## join counts with colours to the shapefile
afro_shp <- left_join(afro_shp, map_counts, by = c("ISO_3_CODE" = "country_iso"))
## set missings (i.e. non-afro) to grey
afro_shp <- afro_shp %>%
mutate(clrs = fct_explicit_na(clrs, na_level = "Grey90"))
## Plot map --------------------------------------------------------------------
## first plot the map as a choropleth
base_map <- ggplot(data = afro_shp) +
## fill shape with the colours column
geom_sf(aes(fill = clrs),
color = "black",
size = 0.1) +
## fill countries by the colours as they are named clrs column
scale_fill_identity(drop = FALSE,
na.value = "grey90") +
## add a scale bar
annotation_scale(location = "br") +
## get rid of axes and extras
theme_void(base_size = 18)
## plot each of the islands seperately
island_plot <- purrr::map(islands,
function(x) {
ggplot(data = filter(afro_shp, country == x)) +
## fill shape with the colours column
geom_sf(aes(fill = clrs),
color = "black",
size = 0.1) +
## fill countries by the colours as
## they are named clrs column
scale_fill_identity(drop = FALSE,
na.value = "grey90") +
## add a scale bar
# annotation_scale(location = "br") +
## get rid of axes and extras
theme_void(base_size = 12) +
theme(legend.position = "none",
panel.border = element_rect(colour = "black", fill = NA),
plot.margin = margin(10,
10,
10,
10)) +
ggtitle(x)
}
)
## plot the legend seperately
## use the cols2 dataset (colour scheme generated by make_colours function)
## use the counts for rows and columns and fill by the clrs columns
legend <- ggplot(
data = cols2,
mapping = aes(
x = cls,
y = rws,
fill = clrs)) +
## create a tile plot (boxes)
geom_tile() +
## fill tiles by the colour names in clr
scale_fill_identity() +
## label the axis ticks from 0 to the number of groups
scale_x_continuous(breaks = c(0:5),
labels = levels(counts$cfr_grp)) +
scale_y_continuous(breaks = c(0:5),
labels = levels(counts$incidence_grp)) +
## make tiles boxes
coord_fixed() +
## label axes
labs(x = "CFR (%)", y = "Incidence (per 10k)") +
theme_minimal(base_size = 18) +
theme(axis.text.x = element_text(angle = 45, hjust = 0.75, vjust = 1))
## using {patchwork} - set the plot layout area
## The map goes from top left at 1,1 to bottom right at 10,10
## the legend sits within that as a smaller box
## (think of this as upside-down cartesian coordinates)
layout <- c(
area(t = 1, l = 1, b = 10, r = 10),
area(t = 7, l = 2, b = 9, r = 4),
area(t = 11, l = 1, b = 12, r = 10)
)
## combine the map and the legend with the above layout
full_map <- base_map +
legend +
wrap_plots(island_plot, nrow = 1) +
plot_layout(design = layout)
ggsave(plot = full_map,
filename = here::here("included_map.jpg"),
height = 7,
width = 7*1.25,
units = "in")
################################################################################
## comparing death counts with external databases
library("COVID19")
## download data (cleaned so NAs are replaced with zero)
world_counts <- covid19(raw = FALSE)
## get the sources of data for each
world_counts_source <- covid19cite(world_counts)
## only keep countries of interest for counts
world_counts <- world_counts %>%
filter(id %in% counts$country_iso)
## only keep countries of interest for sources
world_counts_source <- world_counts_source %>%
filter(iso_alpha_3 %in% counts$country_iso)
## find first non missing date with death counts
# earliest_deaths <- world_counts %>%
# group_by(id) %>%
# filter(!is.na(deaths)) %>%
# summarise(first = first(date)) %>%
# summarise(first = min(first)) %>%
# pull() %>%
# as.Date()
## filter based on that date
# world_counts <- world_counts %>%
# filter(date >= earliest_deaths)
## define our start and end dates
start_date <- as.Date("2020-03-21")
end_date <- as.Date("2020-10-31")
## filter for reports on the dates of interest (remember cumullative counts)
world_counts <- world_counts %>%
filter(date %in% c(
start_date,
end_date))
## create the amount at end_date by substracting amount at start_date
indicator_counts <- world_counts %>%
group_by(id) %>%
summarise(confirmed = confirmed - lag(confirmed),
deaths = deaths - lag(deaths),
population = population) %>%
## drop start_date row
filter(!is.na(confirmed)) %>%
## calculate inc and cfr
mutate(incidence = confirmed / population * 100000,
cfr = deaths / confirmed * 100)
## label data from johns hopkins
names(indicator_counts) <- paste0(names(indicator_counts), "_jhu")
## combine with our counts data
comparisson <- left_join(
select(
counts,
country_iso,
confirmed,
dead,
population = x2020,
incidence,
cfr = CFR
),
indicator_counts,
by = c("country_iso" = "id_jhu")
)
## sort columns so can compare next to eachother
comparisson <- comparisson %>%
select(country_iso,
contains("confirmed"),
contains("dea"),
contains("population"),
contains("incidence"),
contains("cfr"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.