inst/doc/epiR_descriptive.R

## ----echo = FALSE, message = FALSE--------------------------------------------

# If you want to create a PDF document paste the following after line 9 above:
#   pdf_document:
#     toc: true
#     highlight: tango
#     number_sections: no
#     latex_engine: xelatex    
# header-includes: 
#    - \usepackage{fontspec}

knitr::opts_chunk$set(collapse = T, comment = "#>")
options(tibble.print_min = 4L, tibble.print_max = 4L)

## ----message = FALSE----------------------------------------------------------
library(epiR); library(ggplot2); library(scales); library(zoo)

ncas <- 4; npop <- 200
tmp <- as.matrix(cbind(ncas, npop))
epi.conf(tmp, ctype = "prevalence", method = "exact", N = 1000, design = 1, 
   conf.level = 0.95) * 100

## -----------------------------------------------------------------------------
ncas <- 136; ntar <- 22050
tmp <- as.matrix(cbind(ncas, ntar))
epi.conf(tmp, ctype = "inc.rate", method = "exact", N = 1000, design = 1, 
   conf.level = 0.95) * 1000

## -----------------------------------------------------------------------------
ncas <- c(347,444,145,156,56,618,203,113,10,30,663,447,213,52,256,216,745,97,31,250,430,494,96,544,352)
npop <- c(477,515,1114,625,69,1301,309,840,68,100,1375,1290,1289,95,307,354,1393,307,35,364,494,1097,261,615,508)
rname <- paste("Region ", 1:length(npop), sep = "")
dat.df <- data.frame(rname,ncas,npop)

## -----------------------------------------------------------------------------
tmp <- as.matrix(cbind(dat.df$ncas, dat.df$npop))
tmp <- epi.conf(tmp, ctype = "prevalence", method = "exact", N = 1000, design = 1, 
   conf.level = 0.95) * 100
dat.df <- cbind(dat.df, tmp)
head(dat.df)

## -----------------------------------------------------------------------------
dat.df <- dat.df[sort.list(dat.df$est),]
dat.df$rank <- 1:nrow(dat.df)
dat.df$labels <- dat.df$rname

## ----dfreq02-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:dfreq02}Ranked error bar plot showing the prevalence of disease (and its 95% confidence interval) for 100 population units."----
ggplot(data = dat.df, aes(x = rank, y = est)) +
  theme_bw() +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.1) +
  geom_point() +
  scale_x_continuous(limits = c(0,25), breaks = dat.df$rank, labels = dat.df$labels, name = "Region") +
  scale_y_continuous(limits = c(0,100), name = "Cases per 100 individuals at risk") + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

## -----------------------------------------------------------------------------
ndelete <- function(x, n){
  id <- seq(from = 1, to = length(x), by = n)
  rval <- rep("", times = length(x))
  rval[id] <- x[id]
  rval
}

dat.df$labels <- ndelete(x = dat.df$rname, n = 2)

## ----dfreq03-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:dfreq03}Ranked error bar plot showing the prevalence of disease (and its 95% confidence interval) for 100 population units."----
ggplot(data = dat.df, aes(x = rank, y = est)) +
  theme_bw() +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.1) +
  geom_point() +
  scale_x_continuous(limits = c(0,25), breaks = dat.df$rank, labels = dat.df$labels, name = "Region") +
  scale_y_continuous(limits = c(0,100), name = "Cases per 100 individuals at risk") + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

## -----------------------------------------------------------------------------
n.males <- 100; n.females <- 50
odate <- seq(from = as.Date("2022-07-26"), to = as.Date("2022-12-13"), by = 1)
prob <- c(1:100, 41:1); prob <- prob / sum(prob)
modate <- sample(x = odate, size = n.males, replace = TRUE, p = prob)
fodate <- sample(x = odate, size = n.females, replace = TRUE)

dat.df <- data.frame(sex = c(rep("Male", n.males), rep("Female", n.females)), 
   odate = c(modate, fodate))

# Sort the data in order of odate:
dat.df <- dat.df[sort.list(dat.df$odate),] 

## ----epicurve01-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve01}Frequency histogram showing counts of incident cases of disease as a function of calendar date, 26 July to 13 December 2022."----
ggplot(data = dat.df, aes(x = as.Date(odate))) +
  theme_bw() +
  geom_histogram(binwidth = 7, colour = "gray", fill = "dark blue", linewidth = 0.1) +
  scale_x_date(breaks = date_breaks("7 days"), labels = date_format("%d %b"), 
     name = "Date") +
  scale_y_continuous(breaks = seq(from = 0, to = 30, by = 5), limits = c(0,30), name = "Number of cases") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

## ----epicurve02-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve02}Frequency histogram showing counts of incident cases of disease as a function of calendar date, 26 July to 13 December 2022. Superimposed on this plot is a smoothed estimate of case density."----

ggplot(data = dat.df, aes(x = odate)) +
  theme_bw() +
  geom_histogram(binwidth = 7, colour = "gray", fill = "dark blue", linewidth = 0.1) +
  geom_density(aes(y = after_stat(density) * (nrow(dat.df) * 7)), colour = "red") +
  scale_x_date(breaks = date_breaks("7 days"), labels = date_format("%d %b"), 
     name = "Date") +
  scale_y_continuous(breaks = seq(from = 0, to = 30, by = 5), limits = c(0,30), name = "Number of cases") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))


## ----epicurve03-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve03}Frequency histogram showing counts of incident cases of disease as a function of calendar date, 26 July to 13 December 2022, conditioned by sex."----
ggplot(data = dat.df, aes(x = as.Date(odate))) +
  theme_bw() +
  geom_histogram(binwidth = 7, colour = "gray", fill = "dark blue", linewidth = 0.1) +
  scale_x_date(breaks = date_breaks("1 week"), labels = date_format("%d %b"), 
     name = "Date") +
  scale_y_continuous(breaks = seq(from = 0, to = 30, by = 5), limits = c(0,30), name = "Number of cases") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
  facet_grid( ~ sex)

## ----epicurve04-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve04}Frequency histogram showing counts of incident cases of disease as a function of calendar date, 26 July to 13 December 2022, conditioned by sex. An event that occurred on 31 October 2022 is indicated by the vertical dashed line."----
ggplot(data = dat.df, aes(x = as.Date(odate))) +
  theme_bw() +
  geom_histogram(binwidth = 7, colour = "gray", fill = "dark blue", linewidth = 0.1) +
  scale_x_date(breaks = date_breaks("1 week"), labels = date_format("%d %b"), 
     name = "Date") +
  scale_y_continuous(breaks = seq(from = 0, to = 30, by = 5), limits = c(0,30), name = "Number of cases") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
  facet_grid( ~ sex) +
  geom_vline(aes(xintercept = as.numeric(as.Date("31/10/2022", format = "%d/%m/%Y"))), 
   linetype = "dashed")

## ----epicurve05-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve05}Frequency histogram showing counts of incident cases of disease as a function of calendar date, 26 July to 13 December 2022, grouped by sex."----
ggplot(data = dat.df, aes(x = as.Date(odate), group = sex, fill = sex)) +
  theme_bw() +
  geom_histogram(binwidth = 7, colour = "gray", linewidth = 0.1) +
  scale_x_date(breaks = date_breaks("1 week"), labels = date_format("%d %b"), 
     name = "Date") +
  scale_y_continuous(breaks = seq(from = 0, to = 30, by = 5), limits = c(0,30), name = "Number of cases") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
  geom_vline(aes(xintercept = as.numeric(as.Date("31/10/2022", format = "%d/%m/%Y"))), 
   linetype = "dashed") + 
  scale_fill_manual(values = c("#d46a6a", "#738ca6"), name = "Sex") +
  theme(legend.position = c(0.90, 0.80))

## ----epicurve06-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve06}Frequency histogram showing counts of incident cases of disease as a function of calendar date, 26 July to 13 December 2022, grouped by sex."----
ggplot(data = dat.df, aes(x = as.Date(odate), group = sex, fill = sex)) +
  theme_bw() +
  geom_histogram(binwidth = 7, colour = "gray", linewidth = 0.1, position = "dodge") +
  scale_x_date(breaks = date_breaks("1 week"), labels = date_format("%d %b"), 
     name = "Date") +
  scale_y_continuous(breaks = seq(from = 0, to = 30, by = 5), limits = c(0,30), name = "Number of cases") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
  geom_vline(aes(xintercept = as.numeric(as.Date("31/10/2022", format = "%d/%m/%Y"))), 
   linetype = "dashed") + 
  scale_fill_manual(values = c("#d46a6a", "#738ca6"), name = "Sex") + 
  theme(legend.position = c(0.90, 0.80))

## -----------------------------------------------------------------------------
edate <- seq(from = as.Date("2020-02-24"), to = as.Date("2020-07-20"), by = 1)
ncas <- c(1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,2,
   0,0,1,0,1,1,2,3,2,5,10,15,5,7,17,37,31,34,42,46,73,58,67,57,54,104,77,52,
   90,59,64,61,21,26,25,32,24,14,11,23,6,8,9,4,5,7,14,14,1,5,1,1,5,3,3,1,3,3,
   7,5,10,11,21,14,16,15,13,13,8,5,16,7,9,19,13,5,6,6,5,5,10,9,2,2,5,8,10,6,
   8,8,4,9,7,8,3,1,4,2,0,4,8,5,8,10,12,8,20,16,11,25,19)  

dat.df <- data.frame(edate, ncas)
dat.df$edate <- as.Date(dat.df$edate, format = "%Y-%m-%d")
head(dat.df)

## ----epicurve07-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve07}Frequency histogram showing counts of incident cases of disease as a function of calendar date, 24 February 2020 to 20 July 2020."----
ggplot() +
  theme_bw() +
  geom_histogram(dat.df, mapping = aes(x = edate, weight = ncas), binwidth = 1, fill = "#738ca6", colour = "grey", linewidth = 0.1) +
  scale_x_date(breaks = date_breaks("2 weeks"), labels = date_format("%b %Y"), 
     name = "Date") +
  scale_y_continuous(limits = c(0,125), name = "Number of cases") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

## -----------------------------------------------------------------------------
max(cumsum(dat.df$ncas))

## ----epicurve08-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve08}Frequency histogram showing counts of incident cases of disease as a function of calendar date, 24 February 2020 to 20 July 2020. Superimposed on this plot is a line showing cumulative case numbers."----

ggplot() +
  theme_bw() +
  geom_histogram(data = dat.df, mapping = aes(x = edate, weight = ncas), binwidth = 1, fill = "#738ca6", colour = "grey", linewidth = 0.1) +
  geom_line(data = dat.df, mapping = aes(x = edate, y = cumsum(ncas) / 15)) + 
  scale_x_date(breaks = date_breaks("2 weeks"), labels = date_format("%b %Y"), 
     name = "Date") +
  scale_y_continuous(limits = c(0,125), name = "Number of cases", 
      sec.axis = sec_axis(~ . * 15, name = "Cumulative number of cases")) +
  guides(fill = "none") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

## ----epicurve09-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:epicurve09}Frequency histogram showing counts of incident cases of disease as a function of calendar date, 24 February 2020 to 20 July 2020. Superimposed on this plot is the 5-day rolling mean number of cases per day."----

dat.df$rncas <- rollmean(x = dat.df$ncas, k = 5, fill = NA)

ggplot() +
  theme_bw() +
  geom_histogram(data = dat.df, mapping = aes(x = edate, weight = ncas), binwidth = 1, fill = "#738ca6", colour = "grey", linewidth = 0.1) +
  geom_line(data = dat.df, mapping = aes(x = edate, y = rncas), colour = "red") + 
  scale_x_date(breaks = date_breaks("2 weeks"), labels = date_format("%b %Y"), 
     name = "Date") +
  scale_y_continuous(limits = c(0,125), name = "Number of cases") +
  guides(fill = "none") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

## ----message = FALSE, warning = FALSE-----------------------------------------
library(sf); library(spData); library(plyr); library(RColorBrewer); library(sp); library(spatstat)

nyage65utm.sf <- st_read(dsn = system.file("shapes/NY8_bna_utm18.gpkg", package = "spData")[1])
head(nyage65utm.sf)

## ----spatial01-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:spatial01}Map of an area of New York, USA showing for each census tract the percentage of individuals aged greater than 65 years."----
ggplot() + 
   theme_bw() +
   geom_sf(data = nyage65utm.sf, aes(fill = PCTAGE65P), colour = "dark grey") + 
   scale_fill_gradientn(limits = c(0,0.5), colours = brewer.pal(n = 5, "Reds"), guide = "colourbar") +
   scale_x_continuous(name = "Longitude") +
   scale_y_continuous(name = "Latitude") +
   labs(fill = "SIDS 1974")

## ----message = FALSE----------------------------------------------------------
data(chorley)
chorley.df <- data.frame(xcoord = chorley$x * 1000, ycoord = chorley$y * 1000, status = chorley$marks)
chorley.df$status <- factor(chorley.df$status, levels = c("lung","larynx"), labels = c("Lung","Larynx"))

chlarynxbng.sf <- st_as_sf(chorley.df, coords = c("xcoord","ycoord"), remove = FALSE)
st_crs(chlarynxbng.sf) <- 27700

chlarynxbng.ow <- chorley$window

## -----------------------------------------------------------------------------
coords <- matrix(c(chlarynxbng.ow$bdry[[1]]$x * 1000, chlarynxbng.ow$bdry[[1]]$y * 1000), ncol = 2, byrow = FALSE)
pol <- Polygon(coords, hole = FALSE)
pol <- Polygons(list(pol),1)
pol <- SpatialPolygons(list(pol))
chpolbng.spdf <- SpatialPolygonsDataFrame(Sr = pol, data = data.frame(id = 1), match.ID = TRUE)

## -----------------------------------------------------------------------------
chpolbng.sf <- as(chpolbng.spdf, "sf")
st_crs(chpolbng.sf) <- 27700

## -----------------------------------------------------------------------------
mformat <- function(){
   function(x) format(x / 1000, digits = 2)
}

## ----spatial02-fig, warnings = FALSE, echo = TRUE, fig.cap="\\label{fig:spatial02}Point map showing the place of residence of individuals diagnosed with laryngeal cancer (Pos) and lung cancer (Neg), Copull Lancashire, UK, 1972 to 1980."----
ggplot() +
  theme_bw() +
  geom_sf(data = chlarynxbng.sf, aes(colour = status, shape = status)) +
  geom_sf(data = chpolbng.sf, fill = "transparent", colour = "black") +
  coord_sf(datum = st_crs(chpolbng.sf)) +
  scale_colour_manual(name = "Type", values = c("grey","red")) +
  scale_shape_manual(name = "Type", values = c(1,16)) +
  scale_x_continuous(name = "Easting (km)", labels = mformat()) +
  scale_y_continuous(name = "Northing (km)", labels = mformat()) +
  theme(legend.position = c(0.10, 0.12))

Try the epiR package in your browser

Any scripts or data that you put into this service are public.

epiR documentation built on June 22, 2024, 10:57 a.m.