# Set knitr options knitr::opts_chunk$set(echo = FALSE, # echo code so reader can see what is happening warning = FALSE, message = FALSE, fig_caption = TRUE, fig_height = 6, # default, make it bigger to stretch vertical axis fig_width = 8, fig_width = 8, # full width tidy = TRUE) # tidy up code in case echo = TRUE
# Load Packages ---- rmdLibs <- c("ggplot2", "kableExtra", "plotly", "skimr", "viridis") dkUtils::loadLibraries(rmdLibs) # Adjust knitr options if required knitr::opts_chunk$set(echo = TRUE) # Log compile time: myParams$startTime <- proc.time() # Parameters ---- # set xlim for plotly to reduce plot size & load speed myParams$xlimMinDateTime <- lubridate::as_datetime("2018-01-01 00:00:00") myParams$xlimMaxDateTime <- lubridate::as_datetime("2020-06-01 00:00:00") myParams$xlimMinDate <- lubridate::as_date("2018-01-01") myParams$xlimMaxDate <- lubridate::as_date("2020-06-01") myParams$oneYearAgo <- lubridate::as_datetime(now() - (365*24*60*60)) # set values for annotations myParams$lockDownStartDate <- as.Date("2020-03-24") myParams$lockDownStartDateTime <- lubridate::as_datetime("2020-03-24 00:00:00") myParams$lockDownEndDate <- lubridate::today() myParams$lockDownEndDateTime <- lubridate::now() myParams$recentCutDate <- lubridate::as_date("2020-03-01") myParams$gamCap <- "Trend line = Generalized additive model (gam) with integrated smoothness estimation" myParams$lockdownCap <- "\nColoured rectangle = UK covid lockdown to date" myParams$weekendCap <- "\nShaded rectangle = weekends" myParams$myAlpha <- 0.1 myParams$vLineAlpha <- 0.4 myParams$vLineCol <- "red" # http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/#a-colorblind-friendly-palette myParams$myTextSize <- 4 # Functions ---- makeDotPlot <- function(dt, xVar, yVar, byVar, yLab){ p <- ggplot2::ggplot(dt, aes(x = get(xVar), y = get(yVar), colour = get(byVar), alpha = 1/100 # https://ggplot2.tidyverse.org/reference/geom_point.html ) ) + geom_point(size = 1, # small shape = 4 # small cross shape ) + scale_colour_viridis_d(name = eval(byVar)) + guides(alpha = FALSE) + # remove alpha legend labs(x = "Time", y = eval(yLab)) + theme(legend.position="bottom") + guides(colour = guide_legend(nrow = 2)) # forces 2 rows to legend return(p) } makeTilePlot <- function(dt, xVar, yVar, fillVar, yLab){ p <- ggplot2::ggplot(dt, aes(x = get(xVar), y = get(yVar), fill = get(fillVar) ) ) + geom_tile() + scale_fill_continuous(low = "green", high = "red", name = "Value") + labs(x = "Time", y = yLab) + theme(legend.position="bottom") return(p) } addLockdownDate <- function(p){ # assumes p has x = obsDate # p <- p + annotate("text", x = myParams$lockDownStartDate, # y = yMax * 0.4, angle = 10,size = myParams$myTextSize, # label = "UK covid lockdown to date", hjust = 0.5) p <- p + annotate("rect", xmin = myParams$lockDownStartDate, xmax = myParams$lockDownEndDate, ymin = yMin, ymax = yMax, alpha = myParams$myAlpha, fill = myParams$vLineCol, colour = myParams$vLineCol) return(p) } addLockdownDateTime <- function(p){ # assumes p has x = obsDateTime # p <- p + annotate("text", x = myParams$lockDownStartDateTime, # y = yMax * 0.4, angle = 10,size = myParams$myTextSize, # label = "UK covid lockdown to date", hjust = 0.5) p <- p + annotate("rect", xmin = myParams$lockDownStartDateTime, xmax = myParams$lockDownEndDateTime, ymin = yMin, ymax = yMax, alpha = myParams$myAlpha, fill = myParams$vLineCol, colour = myParams$vLineCol) return(p) } # only makes sense to use these for x axis covering March onwards myParams$weAlpha <- 0.3 myParams$weFill <- "grey50" addWeekendsDate <- function(p){ p <- p + annotate("rect", xmin = as.Date("2020-03-07"), xmax = as.Date("2020-03-08"), ymin = yMin, ymax = yMax, alpha = myParams$weAlpha, fill = myParams$weFill) p <- p + annotate("rect", xmin = as.Date("2020-03-14"), xmax = as.Date("2020-03-15"), ymin = yMin, ymax = yMax, alpha = myParams$weAlpha, fill = myParams$weFill) p <- p + annotate("rect", xmin = as.Date("2020-03-21"), xmax = as.Date("2020-03-22"), ymin = yMin, ymax = yMax, alpha = myParams$weAlpha, fill = myParams$weFill) p <- p + annotate("rect", xmin = as.Date("2020-03-28"), xmax = as.Date("2020-03-29"), ymin = yMin, ymax = yMax, alpha = myParams$weAlpha, fill = myParams$weFill) p <- p + annotate("rect", xmin = as.Date("2020-04-04"), xmax = as.Date("2020-04-05"), ymin = yMin, ymax = yMax, alpha = myParams$weAlpha, fill = myParams$weFill) return(p) } addWeekendsDateTime <- function(p){ p <- p + annotate("rect", xmin = lubridate::as_datetime("2020-03-07 00:00:00"), xmax = lubridate::as_datetime("2020-03-08 23:59:59"), ymin = yMin, ymax = yMax, alpha = myParams$weAlpha, fill = myParams$weFill) p <- p + annotate("rect", xmin = lubridate::as_datetime("2020-03-14 00:00:00"), xmax = lubridate::as_datetime("2020-03-15 23:59:59"), ymin = yMin, ymax = yMax, alpha = myParams$weAlpha, fill = myParams$weFill) p <- p + annotate("rect", xmin = lubridate::as_datetime("2020-03-21 00:00:00"), xmax = lubridate::as_datetime("2020-03-22 23:59:59"), ymin = yMin, ymax = yMax, alpha = myParams$weAlpha, fill = myParams$weFill) p <- p + annotate("rect", xmin = lubridate::as_datetime("2020-03-28 00:00:00"), xmax = lubridate::as_datetime("2020-03-29 23:59:59"), ymin = yMin, ymax = yMax, alpha = myParams$weAlpha, fill = myParams$weFill) p <- p + annotate("rect", xmin = lubridate::as_datetime("2020-04-04 00:00:00"), xmax = lubridate::as_datetime("2020-04-05 23:59:59"), ymin = yMin, ymax = yMax, alpha = myParams$weAlpha, fill = myParams$weFill) return(p) }
Please note that authorship is alphabetical. Contributions are listed below - see github for details and who to blame for what :-).
@dataknut
)If you wish to refer to any of the material from this report please cite as:
r format(Sys.time(), format = "%Y")
) r params$title
: r params$subtitle
, University of Southampton: Southampton, UK.Report circulation:
Report purpose:
This work is (c) r format(Sys.time(), format = "%Y")
the University of Southampton.
Data for Southampton downloaded from :
Southampton City Council collects various forms of air quality data at the sites shown in \@ref(tab:showSites). Some of these sites feed data to AURN. The AURN data then undergoes a manual check and ratification process.
WHO publishes information on the health consequences and "acceptable" exposure levels for each of these.
Data health warning
The southampton.my-air.uk data used is not cleaned or tested for measurement error. The AURN from https://uk-air.defra.gov.uk/ has been ratified if it is mroe than 6 months old.
sotonAirDT[, obsDate := lubridate::date(dateTimeUTC)] sotonAirDT[, site := ifelse(site == "Southampton A33", "Southampton A33 AURN data", site)] sotonAirDT[, site := ifelse(site == "Southampton Centre", "Southampton Centre AURN data", site)] t <- sotonAirDT[!is.na(value),.(from = min(dateTimeUTC), to = max(dateTimeUTC), nObs = .N), keyby = .(site, source, pollutant)] kableExtra::kable(t, caption = "Dates data != NA available by site and measure", digits = 2) %>% kable_styling()
Summarise previously downloaded and processed data... Note that this may not be completely up to date.
t <- sotonAirDT[, .(mean = mean(value, na.rm = TRUE) ), keyby = .(site, pollutant, source)] kableExtra::kable(t, caption = "Mean values per site (NaN indicates not measured)") %>% kable_styling()
Table \@ref(tab:summariseData) gives an indication of the availability of the different measures.
In this section we present graphical analysis of the previoulsy downloaded data. Note this is just a snapshot of the data available.
From WHO: https://www.who.int/news-room/fact-sheets/detail/ambient-(outdoor)-air-quality-and-health
"As an air pollutant, NO2 has several correlated activities. At short-term, concentrations exceeding 200 μg/m3, it is a toxic gas which causes significant inflammation of the airways.
NO2 is the main source of nitrate aerosols, which form an important fraction of PM2.5 and, in the presence of ultraviolet light, of ozone. The major sources of anthropogenic emissions of NO2 are combustion processes (heating, power generation, and engines in vehicles and ships).
Health effects
Epidemiological studies have shown that symptoms of bronchitis in asthmatic children increase in association with long-term exposure to NO2. Reduced lung function growth is also linked to NO2 at concentrations currently measured (or observed) in cities of Europe and North America."
yLab <- "Nitrogen Dioxide (ug/m3)" no2dt <- sotonAirDT[pollutant == "no2"] t <- no2dt[!is.na(value), .(mean = mean(value, na.rm = TRUE), sd = sd(value, na.rm = TRUE), min = min(value, na.rm = TRUE), max = max(value, na.rm = TRUE), minDate = min(obsDate), maxDate = max(obsDate)), keyby = .(site, source)] kableExtra::kable(t, caption = "Summary of NO2 data") %>% kable_styling()
Table \@ref(tab:No2Summary) suggests that there may be a few (r nrow(no2dt[value < 0])
) negative values. These are summarised in \@ref(tab:testNegNo2) while Figure \@ref(fig:testNegNo2) shows the availability and levels of the pollutant data over time.
t <- head(no2dt[value <0], 10) kableExtra::kable(t, caption = "Negative NO2 values (up to first 10)") %>% kable_styling() t <- table(no2dt[value < 0]$site) kableExtra::kable(t, caption = "Negative NO2 values (count by site)") %>% kable_styling() # dt,xvar, yvar,fillVar, yLab p <- makeTilePlot(no2dt, xVar = "dateTimeUTC", yVar = "site", fillVar = "value", yLab = yLab) p
Figure \@ref(fig:plotNo2Daily) shows daily mean values for all sites over time and includes smoother trend lines for each site.
plotDT <- no2dt[!is.na(value), .(mean = mean(value, na.rm = TRUE)), keyby = .(obsDate, site)] p <- makeDotPlot(plotDT, xVar = "obsDate", yVar = "mean", byVar = "site", yLab = paste0("Mean daily ", yLab) ) p <- p + geom_smooth() + # add smoothed line guides(colour = guide_legend(ncol=2)) + labs(caption = paste0(myParams$gamCap, myParams$lockdownCap)) yMax <- max(plotDT$mean) yMin <- min(plotDT$mean) addLockdownDate(p)
plotDT <- no2dt[!is.na(value) & obsDate > myParams$recentCutDate, .(mean = mean(value, na.rm = TRUE)), keyby = .(obsDate, site)] p <- makeDotPlot(plotDT, xVar = "obsDate", yVar = "mean", byVar = "site", yLab = paste0("Mean daily ", yLab) ) p <- p + #geom_smooth(se = FALSE) + # add smoothed line guides(colour = guide_legend(ncol=2)) + scale_x_date(date_breaks = "2 day", date_labels = "%a %d %b") + theme(axis.text.x=element_text(angle=90, hjust=1)) + labs(caption = paste0(myParams$gamCap, myParams$lockdownCap, myParams$weekendCap)) yMax <- max(plotDT$mean) yMin <- min(plotDT$mean) p <- addLockdownDate(p) addWeekendsDate(p)
dt <- no2dt[!is.na(value) & dateTimeUTC > myParams$oneYearAgo] t <- dt[value > myParams$hourlyNo2Threshold_WHO][order(-value)] kableExtra::kable(caption = paste0("Values greater than WHO threshold (NO2 > ", myParams$hourlyNo2Threshold_WHO , ", last 12 months)"), head(t, 10)) %>% kable_styling() p <- makeDotPlot(dt, xVar = "dateTimeUTC", yVar = "value", byVar = "site", yLab = yLab) p <- p + geom_hline(yintercept = myParams$hourlyNo2Threshold_WHO) + labs(caption = "Reference line = WHO hourly threshold") p <- p + geom_smooth() + # add smoothed line guides(colour = guide_legend(ncol=2)) + labs(caption = paste0(myParams$gamCap, myParams$lockdownCap)) yMax <- max(dt$value) yMin <- min(dt$value) addLockdownDateTime(p)
Figure \@ref(fig:plotNo2Hourly) shows hourly values for all sites for the last 12 months. In this period there were r nrow(t)
hours when the hourly Nitrogen Dioxide level breached the relevant WHO hourly threshold (r myParams$hourlyNo2Threshold_WHO
). The worst 10 cases (if any) are shown in Table \@ref(tab:plotNo2Hourly).
Clearly there are spikes, the mean daily values show less variance (and less extremes) than the hourly data and there has also appears to have been a decreasing trend over time since early 2019.
Figure \@ref(fig:no2recent) shows the most recent hourly data.
recentDT <- no2dt[!is.na(value) & obsDate > myParams$recentCutDate] p <- makeDotPlot(recentDT, xVar = "dateTimeUTC", yVar = "value", byVar = "site", yLab = yLab) p <- p + scale_x_datetime(date_breaks = "2 day", date_labels = "%a %d %b") + theme(axis.text.x=element_text(angle=90, hjust=1)) + labs(caption = paste0(myParams$gamCap, myParams$lockdownCap, myParams$weekendCap)) # final plot - adds annotations yMin <- min(recentDT$value) yMax <- max(recentDT$value) p <- addLockdownDateTime(p) addWeekendsDateTime(p)
Beware seasonal trends and weather effects
yLab <- "Oxides of Nitrogen (ug/m3)" noxdt <- sotonAirDT[pollutant == "nox"] t <- noxdt[!is.na(value), .(mean = mean(value, na.rm = TRUE), sd = sd(value, na.rm = TRUE), min = min(value, na.rm = TRUE), max = max(value, na.rm = TRUE), minDate = min(obsDate), maxDate = max(obsDate)), keyby = .(site, source)] kableExtra::kable(t, caption = "Summary of NOx data") %>% kable_styling()
Table \@ref(tab:NoxSummary) suggests that there may be a few (r nrow(noxdt[value < 0])
) negative values. These are summarised in \@ref(tab:testNegNox) while Figure \@ref(fig:testNegNox) shows the availability and levels of the pollutant data over time.
t <- head(noxdt[value <0], 10) kableExtra::kable(t, caption = "Negative NOx values (up to first 10)") %>% kable_styling() t <- table(noxdt[value < 0]$site) kableExtra::kable(t, caption = "Negative NOx values (count by site)") %>% kable_styling() # dt,xvar, yvar,fillVar, yLab p <- makeTilePlot(noxdt, xVar = "dateTimeUTC", yVar = "site", fillVar = "value", yLab = yLab) p
Figure \@ref(fig:plotNoxDaily) shows daily mean values for all sites over time and includes smoother trend lines for each site.
plotDT <- noxdt[!is.na(value), .(mean = mean(value, na.rm = TRUE)), keyby = .(obsDate, site)] p <- makeDotPlot(plotDT, xVar = "obsDate", yVar = "mean", byVar = "site", yLab = paste0("Mean daily ", yLab) ) p <- p + geom_smooth() + # add smoothed line guides(colour = guide_legend(ncol=2)) + labs(caption = paste0(myParams$gamCap, myParams$lockdownCap)) yMax <- max(plotDT$mean) yMin <- min(plotDT$mean) addLockdownDate(p)
plotDT <- noxdt[!is.na(value) & obsDate > myParams$recentCutDate, .(mean = mean(value, na.rm = TRUE)), keyby = .(obsDate, site)] p <- makeDotPlot(plotDT, xVar = "obsDate", yVar = "mean", byVar = "site", yLab = paste0("Mean daily ", yLab) ) p <- p + #geom_smooth(se = FALSE) + # add smoothed line guides(colour = guide_legend(ncol=2)) + scale_x_date(date_breaks = "2 day", date_labels = "%a %d %b") + theme(axis.text.x=element_text(angle=90, hjust=1)) + labs(caption = paste0(myParams$gamCap, myParams$lockdownCap, myParams$weekendCap)) yMax <- max(plotDT$mean) yMin <- min(plotDT$mean) p <- addLockdownDate(p) addWeekendsDate(p)
dt <- noxdt[!is.na(value) & dateTimeUTC > myParams$oneYearAgo] p <- makeDotPlot(dt, xVar = "dateTimeUTC", yVar = "value", byVar = "site", yLab = yLab) p <- p + geom_smooth() + # add smoothed line guides(colour = guide_legend(ncol=2)) + labs(caption = paste0(myParams$gamCap, myParams$lockdownCap)) yMax <- max(dt$value) yMin <- min(dt$value) addLockdownDateTime(p)
Figure \@ref(fig:plotNo2Hourly) shows hourly values for all sites for the last 12 months.
Clearly there are peaks, the mean daily values show less variance (and less extremes) than the hourly data but it is difficult to discern any particular trend.
Figure \@ref(fig:noxrecent) shows the most recent hourly data.
recentDT <- noxdt[!is.na(value) & obsDate > myParams$recentCutDate] p <- makeDotPlot(recentDT, xVar = "dateTimeUTC", yVar = "value", byVar = "site", yLab = yLab) p <- p + scale_x_datetime(date_breaks = "2 day", date_labels = "%a %d %b") + theme(axis.text.x=element_text(angle=90, hjust=1)) + labs(caption = paste0(myParams$gamCap, myParams$lockdownCap, myParams$weekendCap)) # final plot - adds annotations yMin <- min(recentDT$value) yMax <- max(recentDT$value) p <- addLockdownDateTime(p) addWeekendsDateTime(p)
Beware seasonal trends and weather effects
From WHO: https://www.who.int/news-room/fact-sheets/detail/ambient-(outdoor)-air-quality-and-health
"SO2 is a colourless gas with a sharp odour. It is produced from the burning of fossil fuels (coal and oil) and the smelting of mineral ores that contain sulfur. The main anthropogenic source of SO2 is the burning of sulfur-containing fossil fuels for domestic heating, power generation and motor vehicles.
Health effects
SO2 can affect the respiratory system and the functions of the lungs, and causes irritation of the eyes. Inflammation of the respiratory tract causes coughing, mucus secretion, aggravation of asthma and chronic bronchitis and makes people more prone to infections of the respiratory tract. Hospital admissions for cardiac disease and mortality increase on days with higher SO2 levels. When SO2 combines with water, it forms sulfuric acid; this is the main component of acid rain which is a cause of deforestation."
yLab <- "Sulphour Dioxide (ug/m3)" so2dt <- sotonAirDT[pollutant == "so2"] t <- so2dt[, .(mean = mean(value, na.rm = TRUE), sd = sd(value, na.rm = TRUE), min = min(value, na.rm = TRUE), max = max(value, na.rm = TRUE)), keyby = .(site)] kableExtra::kable(t, caption = "Summary of SO2 data") %>% kable_styling()
Figure \@ref(fig:testNegSo2) shows the availability and levels of the pollutant data over time.
t <- head(so2dt[value <0], 10) kableExtra::kable(t, caption = "Negative SO2 values (up to first 10)") %>% kable_styling() t <- table(so2dt[value < 0]$site) kableExtra::kable(t, caption = "Negative SO2 values (count by site)") %>% kable_styling() # dt,xvar, yvar,fillVar, yLab p <- makeTilePlot(so2dt, xVar = "dateTimeUTC", yVar = "site", fillVar = "value", yLab = yLab) p
Figure \@ref(fig:plotSo2Daily) shows daily mean values for all sites over time and includes smoother trend lines for each site. The plot looks a bit odd... sensor issues?
plotDT <- so2dt[!is.na(value), .(mean = mean(value, na.rm = TRUE)), keyby = .(obsDate, site)] p <- makeDotPlot(plotDT, xVar = "obsDate", yVar = "mean", byVar = "site", yLab = paste0("Mean daily ", yLab) ) # dailySo2Threshold_WHO p <- p + geom_hline(yintercept = myParams$dailySo2Threshold_WHO) + geom_smooth() + # add smoothed line labs(caption = paste0(myParams$gamCap, myParams$lockdownCap)) yMax <- max(plotDT$mean) yMin <- min(plotDT$mean) addLockdownDate(p)
plotDT <- so2dt[!is.na(value) & obsDate > myParams$recentCutDate, .(mean = mean(value, na.rm = TRUE)), keyby = .(obsDate, site)] p <- makeDotPlot(plotDT, xVar = "obsDate", yVar = "mean", byVar = "site", yLab = paste0("Mean daily ", yLab) ) # dailySo2Threshold_WHO p <- p + geom_hline(yintercept = myParams$dailySo2Threshold_WHO) + scale_x_date(date_breaks = "2 day", date_labels = "%a %d %b") + theme(axis.text.x=element_text(angle=90, hjust=1)) + labs(caption = paste0(myParams$gamCap, myParams$lockdownCap, myParams$weekendCap)) yMax <- max(plotDT$mean) yMin <- min(plotDT$mean) p <- addLockdownDate(p) addWeekendsDate(p)
There is no hourly SO2 threshold - it is 10 minutes (r myParams$tenMSo2Threshold_WHO
).
so2dt <- so2dt[dateTimeUTC > myParams$oneYearAgo] # t <- so2dt[value > myParams$myParams$tenMSo2Threshold_WHO][order(-value)] # # kableExtra::kable(caption = paste0("Values greater than WHO 10 minute threshold (SO2 > ", # myParams$tenMSo2Threshold_WHO , ", last 12 months)"), # head(t, 10)) %>% # kable_styling() p <- makeDotPlot(so2dt[!is.na(value)], xVar = "dateTimeUTC", yVar = "value", byVar = "site", yLab = yLab) p <- p + geom_smooth() + # add smoothed line labs(caption = paste0(myParams$gamCap, myParams$lockdownCap)) yMax <- max(plotDT$value) yMin <- min(plotDT$value) p <- addLockdownDateTime(p) p
Figure \@ref(fig:so2recent) shows the most recent hourly data.
recentDT <- so2dt[!is.na(value) & obsDate > myParams$recentCutDate] p <- makeDotPlot(recentDT, xVar = "dateTimeUTC", yVar = "value", byVar = "site", yLab = yLab) p <- p + scale_x_datetime(date_breaks = "2 day", date_labels = "%a %d %b") + theme(axis.text.x=element_text(angle=90, hjust=1)) + labs(caption = paste0(myParams$gamCap, myParams$lockdownCap, myParams$weekendCap)) yMax <- max(recentDT$value) yMin <- min(recentDT$value) p <- addLockdownDateTime(p) addWeekendsDateTime(p)
Beware seasonal trends and weather effects
From WHO: https://www.who.int/news-room/fact-sheets/detail/ambient-(outdoor)-air-quality-and-health
"Ozone at ground level – not to be confused with the ozone layer in the upper atmosphere – is one of the major constituents of photochemical smog. It is formed by the reaction with sunlight (photochemical reaction) of pollutants such as nitrogen oxides (NOx) from vehicle and industry emissions and volatile organic compounds (VOCs) emitted by vehicles, solvents and industry. As a result, the highest levels of ozone pollution occur during periods of sunny weather. Health effects
Excessive ozone in the air can have a marked effect on human health. It can cause breathing problems, trigger asthma, reduce lung function and cause lung diseases. "
yLab <- "Ozone (ug/m3)" o3dt <- sotonAirDT[pollutant == "o3"] t <- o3dt[, .(mean = mean(value, na.rm = TRUE), sd = sd(value, na.rm = TRUE), min = min(value, na.rm = TRUE), max = max(value, na.rm = TRUE)), keyby = .(site, source)] kableExtra::kable(t, caption = "Summary of o3 data") %>% kable_styling()
Table \@ref(tab:pm10Summary) suggests that there may be a few (r nrow(o3dt[value < 0])
) negative values. These are shown in \@ref(tab:testNegPM10) while \@ref(fig:testNegPM10) shows data availability and PM 10 levels over time at each site.
t <- head(o3dt[value <0], nrow(o3dt[value < 0])) kableExtra::kable(head(t), caption = "Negative 03 values - first 6") %>% kable_styling() p <- makeTilePlot(o3dt, xVar = "dateTimeUTC", yVar = "site", fillVar = "value", yLab = yLab) p
plotDT <- o3dt[!is.na(value), .(mean = mean(value, na.rm = TRUE)), keyby = .(obsDate, site)] extreme03Daily <- plotDT[mean > myParams$dailyO3Threshold_WHO][order(-mean)] kableExtra::kable(caption = paste0("10 highest values greater than WHO threshold (PM 10 > ", myParams$dailyO3Threshold_WHO , ")"), digits = 2, head(extreme03Daily, 10)) %>% kable_styling() p <- makeDotPlot(plotDT, xVar = "obsDate", yVar = "mean", byVar = "site", yLab = paste0("Mean daily ", yLab) ) p <- p + geom_hline(yintercept = myParams$dailyO3Threshold_WHO) + geom_smooth() + # add smoothed line labs(caption = paste0(myParams$gamCap, myParams$lockdownCap)) yMax <- max(plotDT$value) yMin <- min(plotDT$value) addLockdownDate(p) nDaysOverThreshold <- uniqueN(extreme03Daily$obsDate) nDays <- uniqueN(plotDT$obsDate) # need to count days not site-days
Figure \@ref(fig:plotPM10Daily) shows daily values for all sites across the entire dataset and indicates the r nDaysOverThreshold
days (r round(100*nDaysOverThreshold/nDays,1)
%) that breached the WHO PM10 daily mean exposure threshold (r myParams$dailyPm10Threshold_WHO
) - see Table \@ref(tab:plotPM10Daily).
plotDT <- o3dt[!is.na(value) & obsDate > myParams$recentCutDate, .(mean = mean(value, na.rm = TRUE)), keyby = .(obsDate, site)] extreme03Daily <- plotDT[mean > myParams$dailyO3Threshold_WHO][order(-mean)] kableExtra::kable(caption = paste0("10 highest values greater than WHO threshold (PM 10 > ", myParams$dailyO3Threshold_WHO , ")"), digits = 2, head(extreme03Daily, 10)) %>% kable_styling() p <- makeDotPlot(plotDT, xVar = "obsDate", yVar = "mean", byVar = "site", yLab = paste0("Mean daily ", yLab) ) p <- p + scale_x_date(date_breaks = "2 day", date_labels = "%a %d %b") + theme(axis.text.x=element_text(angle=90, hjust=1)) + labs(caption = paste0(myParams$gamCap, myParams$lockdownCap, myParams$weekendCap)) yMax <- max(plotDT$mean) yMin <- min(plotDT$mean) p <- addLockdownDate(p) addWeekendsDate(p) nDaysOverThreshold <- uniqueN(extreme03Daily$obsDate) nDays <- uniqueN(plotDT$obsDate) # need to count days not site-days
o31yeardt <- o3dt[dateTimeUTC > myParams$oneYearAgo] t <- o31yeardt[value > 100][order(-value)] kableExtra::kable(caption = "10 highest hourly values (o3 > 100)", head(t)) %>% kable_styling() p <- makeDotPlot(o31yeardt[!is.na(value)], xVar = "dateTimeUTC", yVar = "value", byVar = "site", yLab = yLab) p <- p + labs(caption = "NB: There is no WHO o3 hourly threshold") p <- p + geom_smooth() + # add smoothed line labs(caption = paste0(myParams$gamCap, myParams$lockdownCap)) yMax <- max(o31yeardt$value, na.rm = TRUE) yMin <- min(o31yeardt$value, na.rm = TRUE) addLockdownDateTime(p)
Figure \@ref(fig:plotPM10Hourly) shows hourly PM 10 values for all sites over the last 12 months and suggests there may be outliers (see Table \@ref(tab:plotPM10Hourly)).
Figure \@ref(fig:03recent) shows the most recent hourly data.
recentDT <- o3dt[!is.na(value) & obsDate > myParams$recentCutDate] p <- makeDotPlot(recentDT, xVar = "dateTimeUTC", yVar = "value", byVar = "site", yLab = yLab) p <- p + scale_x_datetime(date_breaks = "2 day", date_labels = "%a %d %b") + theme(axis.text.x=element_text(angle=90, hjust=1)) + labs(caption = paste0(myParams$gamCap, myParams$lockdownCap, myParams$weekendCap)) yMax <- max(recentDT$value) yMin <- min(recentDT$value) p <- addLockdownDateTime(p) addWeekendsDateTime(p)
Beware seasonal trends and weather effects
From WHO: https://www.who.int/news-room/fact-sheets/detail/ambient-(outdoor)-air-quality-and-health
"PM is a common proxy indicator for air pollution. It affects more people than any other pollutant. The major components of PM are sulfate, nitrates, ammonia, sodium chloride, black carbon, mineral dust and water. It consists of a complex mixture of solid and liquid particles of organic and inorganic substances suspended in the air. While particles with a diameter of 10 microns or less, (≤ PM10) can penetrate and lodge deep inside the lungs, the even more health-damaging particles are those with a diameter of 2.5 microns or less, (≤ PM2.5). PM2.5 can penetrate the lung barrier and enter the blood system. Chronic exposure to particles contributes to the risk of developing cardiovascular and respiratory diseases, as well as of lung cancer.
There is a close, quantitative relationship between exposure to high concentrations of small particulates (PM10 and PM2.5) and increased mortality or morbidity, both daily and over time. Conversely, when concentrations of small and fine particulates are reduced, related mortality will also go down – presuming other factors remain the same. This allows policy-makers to project the population health improvements that could be expected if particulate air pollution is reduced."
PM 10 data: has more sensors and wider coverage than PM2.5
yLab <- "PM 10 (ug/m3)" pm10dt <- sotonAirDT[pollutant == "pm10"] t <- pm10dt[, .(mean = mean(value, na.rm = TRUE), sd = sd(value, na.rm = TRUE), min = min(value, na.rm = TRUE), max = max(value, na.rm = TRUE)), keyby = .(site, source)] kableExtra::kable(t, caption = "Summary of pm10 data") %>% kable_styling()
Table \@ref(tab:pm10Summary) suggests that there may be a few (r nrow(pm10dt[value < 0])
) negative values. These are shown in \@ref(tab:testNegPM10) while \@ref(fig:testNegPM10) shows data availability and PM 10 levels over time at each site.
t <- head(pm10dt[value <0], nrow(pm10dt[value < 0])) kableExtra::kable(head(t), caption = "Negative PM10 values - first 6") %>% kable_styling() p <- makeTilePlot(pm10dt, xVar = "dateTimeUTC", yVar = "site", fillVar = "value", yLab = yLab) p
plotDT <- pm10dt[!is.na(value), .(mean = mean(value, na.rm = TRUE)), keyby = .(obsDate, site)] extremePm10Daily <- plotDT[mean > myParams$dailyPm10Threshold_WHO][order(-mean)] kableExtra::kable(caption = paste0("10 highest values greater than WHO threshold (PM 10 > ", myParams$dailyPm10Threshold_WHO , ")"), digits = 2, head(extremePm10Daily, 10)) %>% kable_styling() p <- makeDotPlot(plotDT, xVar = "obsDate", yVar = "mean", byVar = "site", yLab = paste0("Mean daily ", yLab) ) p <- p + geom_hline(yintercept = myParams$dailyPm10Threshold_WHO) + geom_smooth() + # add smoothed line labs(caption = paste0(myParams$gamCap, myParams$lockdownCap)) yMin <- min(plotDT$value) yMax <- max(plotDT$value) addLockdownDate(p) nDaysOverThreshold <- uniqueN(extremePm10Daily$obsDate) nDays <- uniqueN(plotDT$obsDate) # need to count days not site-days
Figure \@ref(fig:plotPM10Daily) shows daily values for all sites across the entire dataset and indicates the r nDaysOverThreshold
days (r round(100*nDaysOverThreshold/nDays,1)
%) that breached the WHO PM10 daily mean exposure threshold (r myParams$dailyPm10Threshold_WHO
) - see Table \@ref(tab:plotPM10Daily).
plotDT <- pm10dt[!is.na(value) & obsDate > myParams$recentCutDate, .(mean = mean(value, na.rm = TRUE)), keyby = .(obsDate, site)] extremePm10Daily <- plotDT[mean > myParams$dailyPm10Threshold_WHO][order(-mean)] kableExtra::kable(caption = paste0("10 highest values greater than WHO threshold (PM 10 > ", myParams$dailyPm10Threshold_WHO , ")"), digits = 2, head(extremePm10Daily, 10)) %>% kable_styling() p <- makeDotPlot(plotDT, xVar = "obsDate", yVar = "mean", byVar = "site", yLab = paste0("Mean daily ", yLab) ) p <- p + geom_hline(yintercept = myParams$dailyPm10Threshold_WHO) + scale_x_date(date_breaks = "2 day", date_labels = "%a %d %b") + theme(axis.text.x=element_text(angle=90, hjust=1)) + labs(caption = paste0(myParams$lockdownCap, "\nReference line = WHO PM10 mean daily threshold")) yMax <- max(plotDT$mean) yMin <- min(plotDT$mean) p <- addLockdownDate(p) addWeekendsDate(p) nDaysOverThreshold <- uniqueN(extremePm10Daily$obsDate) nDays <- uniqueN(plotDT$obsDate) # need to count days not site-days
pm101yeardt <- pm10dt[!is.na(value) & dateTimeUTC > myParams$oneYearAgo] t <- pm101yeardt[value > 100][order(-value)] kableExtra::kable(caption = "10 highest hourly values (PM 10 > 100)", head(t)) %>% kable_styling() p <- makeDotPlot(pm101yeardt[!is.na(value)], xVar = "dateTimeUTC", yVar = "value", byVar = "site", yLab = yLab) p <- p + labs(caption = "") p <- p + geom_smooth() + # add smoothed line labs(caption = paste0(myParams$gamCap,myParams$lockdownCap, "\nNB: There is no WHO PM10 hourly threshold")) yMax <- max(pm101yeardt$value, na.rm = TRUE) yMin <- min(pm101yeardt$value, na.rm = TRUE) addLockdownDateTime(p)
Figure \@ref(fig:plotPM10Hourly) shows hourly PM 10 values for all sites over the last 12 months and suggests there may be outliers (see Table \@ref(tab:plotPM10Hourly)).
Figure \@ref(fig:pm10recent) shows the most recent hourly data.
recentDT <- pm10dt[!is.na(value) & obsDate > myParams$recentCutDate] p <- makeDotPlot(recentDT, xVar = "dateTimeUTC", yVar = "value", byVar = "site", yLab = yLab) p <- p + scale_x_datetime(date_breaks = "2 day", date_labels = "%a %d %b") + theme(axis.text.x=element_text(angle=90, hjust=1)) + labs(caption = paste0(myParams$lockdownCap, myParams$weekendCap)) yMax <- max(recentDT$value) yMin <- min(recentDT$value) p <- addLockdownDateTime(p) addWeekendsDateTime(p)
Beware seasonal trends and weather effects
From WHO: https://www.who.int/news-room/fact-sheets/detail/ambient-(outdoor)-air-quality-and-health
"Small particulate pollution has health impacts even at very low concentrations – indeed no threshold has been identified below which no damage to health is observed. Therefore, the WHO 2005 guideline limits aimed to achieve the lowest concentrations of PM possible."
yLab <- "PM 2.5 (ug/m3)" pm25dt <- sotonAirDT[pollutant == "pm2.5"] t <- dt[!is.na(value), .(mean = mean(value, na.rm = TRUE), sd = sd(value, na.rm = TRUE), min = min(value, na.rm = TRUE), max = max(value, na.rm = TRUE)), keyby = .(site, source)] kableExtra::kable(t, caption = "Summary of pm2.5 data") %>% kable_styling()
Table \@ref(tab:pm25Summary) suggests that there may be a few (r nrow(pm25dt[value < 0])
) negative values. These are shown in Table \@ref(tab:testNegPM25) while Figure \@ref(fig:testNegPM25) shows data availability and PM 2.5 levels over time at each site.
t <- head(pm25dt[value <0], nrow(pm25dt)) kableExtra::kable(head(t), caption = "Negative pm2.5 values - first 6") %>% kable_styling() p <- makeTilePlot(pm25dt, xVar = "dateTimeUTC", yVar = "site", fillVar = "value", yLab = yLab) p
plotDT <- pm25dt[!is.na(value), .(mean = mean(value, na.rm = TRUE)), keyby = .(obsDate, site)] extremePm25Daily <- plotDT[mean > myParams$dailyPm2.5Threshold_WHO][order(-mean)] kableExtra::kable(caption = paste0("6 highest values greater than WHO threshold (PM 2.5 > ", myParams$dailyPm2.5Threshold_WHO , ")"), digits = 2, head(extremePm25Daily)) %>% kable_styling() p <- makeDotPlot(plotDT, xVar = "obsDate", yVar = "mean", byVar = "site", yLab = paste0("Mean daily ", yLab) ) p <- p + geom_hline(yintercept = myParams$dailyPm2.5Threshold_WHO) + geom_smooth() + #add smoothed line labs(caption = paste0(myParams$gamCap, myParams$lockdownCap, "\nReference line = WHO PM2.5 mean daily threshold")) yMax <- max(plotDT$mean) yMin <- min(plotDT$mean) addLockdownDate(p) nDaysOverThreshold <- uniqueN(extremePm25Daily$obsDate) nDays <- uniqueN(plotDT$obsDate) # need to count days not site-days
Figure \@ref(fig:plotPM25Daily) shows daily values for all sites across the dataset and indicates that the WHO PM2_5 daily mean exposure threshold (r myParams$dailyPm2.5Threshold_WHO
) was breached on r nDaysOverThreshold
days (r round(100*nDaysOverThreshold/nDays,1)
%). The 6 worst cases are shown in Table \@ref(tab:plotPM25Daily).
plotDT <- pm25dt[!is.na(value) & obsDate > myParams$recentCutDate, .(mean = mean(value, na.rm = TRUE)), keyby = .(obsDate, site)] extremePm25Daily <- plotDT[mean > myParams$dailyPm2.5Threshold_WHO][order(-mean)] kableExtra::kable(caption = paste0("6 highest values greater than WHO threshold (PM 2.5 > ", myParams$dailyPm2.5Threshold_WHO , ")"), digits = 2, head(extremePm25Daily)) %>% kable_styling() p <- makeDotPlot(plotDT, xVar = "obsDate", yVar = "mean", byVar = "site", yLab = paste0("Mean daily ", yLab) ) p <- p + geom_hline(yintercept = myParams$dailyPm2.5Threshold_WHO) + scale_x_date(date_breaks = "2 day", date_labels = "%a %d %b") + theme(axis.text.x=element_text(angle=90, hjust=1)) + labs(caption = paste0(myParams$lockdownCap, myParams$weekendCap, "\nReference line = WHO daily PM2.5 threshold")) yMax <- max(plotDT$mean) yMin <- min(plotDT$mean) p <- addLockdownDate(p) addWeekendsDate(p) nDaysOverThreshold <- uniqueN(extremePm25Daily$obsDate) nDays <- uniqueN(plotDT$obsDate) # need to count days not site-days
dt <- pm25dt[!is.na(value) & dateTimeUTC > myParams$oneYearAgo] t <- pm25dt[value > 50][order(-value)] kableExtra::kable(caption = "Extreme hourly values (PM 2.5 > 50, last 12 months, worst 6)", head(t)) %>% kable_styling() p <- makeDotPlot(dt[!is.na(value)], xVar = "dateTimeUTC", yVar = "value", byVar = "site", yLab = yLab) p <- p + geom_smooth() + # add smoothed line labs(caption = paste0(myParams$gamCap, "\nNB: There is no WHO PM2.5 hourly threshold")) yMax <- max(dt$value) yMin <- min(dt$value) addLockdownDateTime(p)
Figure \@ref(fig:plotPM25Hourly) shows hourly values for all sites for the last 12 months while Table \@ref(tab:plotPM25Hourly) reports the 6 worst hours.
Figure \@ref(fig:pm25recent) shows the most recent hourly data.
recentDT <- pm25dt[!is.na(value) & obsDate > myParams$recentCutDate] p <- makeDotPlot(recentDT, xVar = "dateTimeUTC", yVar = "value", byVar = "site", yLab = yLab) p <- p + scale_x_datetime(date_breaks = "2 day", date_labels = "%a %d %b") + theme(axis.text.x=element_text(angle=90, hjust=1)) + labs(caption = paste0(myParams$gamCap, myParams$weekendCap, "\nNB: There is no WHO PM2.5 hourly threshold")) yMax <- max(recentDT$value) yMin <- min(recentDT$value) p <- addLockdownDateTime(p) addWeekendsDateTime(p)
Beware seasonal trends and weather effects
skimr::skim(lDT)
skimr::skim(aurnDT)
t <- sotonAirDT[pollutant == "no2"] skimr::skim(t)
t <- sotonAirDT[pollutant == "no2" & value > myParams$hourlyNo2Threshold_WHO][order(-value)] kableExtra::kable(caption = paste0("Values greater than WHO threshold (NO2 > ", myParams$hourlyNo2Threshold_WHO , ")"), t) %>% kable_styling()
t <- sotonAirDT[pollutant == "pm10"] skimr::skim(t)
kableExtra::kable(caption = paste0("PM 10 values greater than WHO threshold (NO2 > ", myParams$dailyPm10Threshold_WHO , ")"), extremePm10Daily) %>% kable_styling()
t <- sotonAirDT[pollutant == "pm2.5"] skimr::skim(t)
kableExtra::kable(caption = paste0("PM 2.5 values greater than WHO threshold (NO2 > ", myParams$dailyPm2.5Threshold_WHO , ")"), extremePm25Daily) %>% kable_styling()
Basic pairs analysis
wideDT <- data.table::dcast(sotonAirDT[!is.na(value)], # drop data cells with value = NA dateTimeUTC + site ~ pollutant, value.var = "value") #pairs(wideDT[, .(no,no2,nox,so2,o3,pm10,pm2.5)]) library("GGally") # https://ggobi.github.io/ggally/#columns_and_mapping GGally::ggpairs(wideDT, columns = c("no","no2","nox","so2","o3","pm10","pm2.5"), aes(color = site) ) # pm10DT <- sotonAirDT[pollutant == "pm10"] # pm10DT[, pm10 := value] # setkey(pm10DT, dateTimeUTC, site, obsDate) # pm25DT <- sotonAirDT[pollutant == "pm2.5"] # pm25DT[, pm25 := value] # setkey(pm25DT, dateTimeUTC, site, obsDate) # # dt <- pm10DT[pm25DT] # # ggplot2::ggplot(dt, aes(x = pm10, y = pm25, colour = site)) + # geom_point() + # theme(legend.position="bottom") + # scale_color_viridis_d(name = "Site")
There are only two data streams which match:
matchedDT <- sotonAirDT[site %like% "AURN"] table(matchedDT$site) # break up and re-join as wide sccA33DT <- matchedDT[site %like% "near docks"] setkey(sccA33DT, dateTimeUTC, pollutant) sccCenDT <- matchedDT[site %like% "near city"] setkey(sccCenDT, dateTimeUTC, pollutant) aurnA33DT <- matchedDT[site %like% "A33 AURN data"] aurnA33DT[, aurnValue := value] setkey(aurnA33DT, dateTimeUTC, pollutant) aurnCenDT <- matchedDT[site %like% "Centre AURN data"] aurnCenDT[, aurnValue := value] setkey(aurnCenDT, dateTimeUTC, pollutant) a33dt <- sccA33DT[aurnA33DT] centredt <- sccCenDT[aurnCenDT] a33dt <- a33dt[!is.na(site)] # remove non-matches a33dt <- a33dt[!is.na(value)] a33dt <- a33dt[!is.na(aurnValue)] centredt <- centredt[!is.na(site)] # remove non-matches centredt <- centredt[!is.na(value)] centredt <- centredt[!is.na(aurnValue)]
Any differences should be to do with the ratification process on data older than 6 months.
Test A33 site
a33dt[, Year := lubridate::year(dateTimeUTC)] ggplot2::ggplot(a33dt, aes(x = value, y = aurnValue, colour = as.factor(Year))) + geom_point() + theme(legend.position="bottom") + scale_colour_discrete(name = "Year") + facet_grid(. ~ pollutant, scales = "free")
Test City centre site
centredt[, Year := lubridate::year(dateTimeUTC)] ggplot2::ggplot(centredt, aes(x = value, y = aurnValue, colour = as.factor(Year))) + geom_point() + theme(legend.position="bottom") + scale_colour_discrete(name = "Year") + facet_grid(. ~ pollutant, scales = "free")
Report generated using knitr in RStudio with r R.version.string
running on r R.version$platform
(r Sys.info()[3]
).
t <- proc.time() - myParams$startTime elapsed <- t[[3]]
Analysis completed in r elapsed
seconds ( r round(elapsed/60,2)
minutes).
R packages used:
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.