# Set knitr options knitr::opts_chunk$set(echo = FALSE, # echo code so reader can see what is happening warning = FALSE, message = FALSE, fig_caption = TRUE, fig_height = 6, # default, make it bigger to stretch vertical axis fig_width = 8, fig_width = 8, # full width tidy = TRUE) # tidy up code in case echo = TRUE
# Load Packages ---- rmdLibs <- c("data.table", "ggplot2", "kableExtra", "plotly", "skimr") dkUtils::loadLibraries(rmdLibs) # load last in case of clashes require(openair) # Adjust knitr options if required knitr::opts_chunk$set(echo = TRUE) # Log compile time: startTime <- proc.time() # Parameters ---- # set xlim for plotly to reduce plot size & load speed xlimMinDateTime <- lubridate::as_datetime("2019-01-01 00:00:00") xlimMaxDateTime <- lubridate::as_datetime("2020-01-01 00:00:00") xlimMinDate <- lubridate::as_date("2019-01-01") xlimMaxDate <- lubridate::as_date("2020-01-01") # https://www.who.int/news-room/fact-sheets/detail/ambient-(outdoor)-air-quality-and-health annualPm10Threshold_WHO <- 20 dailyPm10Threshold_WHO <- 50 annualPm2.5Threshold_WHO <- 10 dailyPm2.5Threshold_WHO <- 25 annualno2Threshold_WHO <- 40 hourlyno2Threshold_WHO <- 200 makePlotly <- "_plotly" # '_plotly' -> yes - for plotly versions of charts # set doPlotly if(makePlotly == "_plotly"){ doPlotly <- 1 # for easier if statements } else { doPlotly <- 0 } # Functions ---- makeDotPlot <- function(dt, xVar, yVar, byVar, yLab){ p <- ggplot2::ggplot(dt, aes(x = get(xVar), y = get(yVar), colour = get(byVar), alpha = 1/100 # https://ggplot2.tidyverse.org/reference/geom_point.html ) ) + geom_point(size = 1, # small shape = 4 # small cross shape ) + scale_color_discrete(name = eval(byVar)) + guides(alpha = FALSE) + # remove alpha legend labs(x = "Time", y = eval(yLab)) + theme(legend.position="bottom") + guides(colour = guide_legend(nrow = 2)) # forces 2 rows to legend return(p) } makeTilePlot <- function(dt, xVar, yVar, fillVar, yLab){ p <- ggplot2::ggplot(dt, aes(x = get(xVar), y = get(yVar), fill = get(fillVar) ) ) + geom_tile() + scale_fill_continuous(low = "green", high = "red", name = "Value") + labs(x = "Time", y = yLab) + theme(legend.position="bottom") return(p) }
Please note that authorship is alphabetical. Contributions are listed below - see github for details and who to blame for what :-).
@dataknut
)If you wish to refer to any of the material from this report please cite as:
r format(Sys.time(), format = "%Y")
) r params$title
: r params$subtitle
, University of Southampton: Southampton, UK.Report circulation:
Report purpose:
This work is (c) r format(Sys.time(), format = "%Y")
the University of Southampton.
dfW <- openair::importAURN( site = "SA33", year = 2019, pollutant = "all", hc = FALSE, meta = TRUE, to_narrow = FALSE, # produces long form data yay! verbose = TRUE # for now ) # fails. it worked before # dfL <- openair::importAURN( # site = "SA33", # year = 2019, # pollutant = "all", # hc = FALSE, # meta = TRUE, # to_narrow = TRUE, # produces long form data yay! # verbose = TRUE # ) dtW <- data.table::as.data.table(dfW) # we like data.tables
Data downloaded from http://uk-air.defra.gov.uk/openair/R_data/ using Ĺpenair::importAURN()
.
Southampton City Council collects various forms of air quality data at the sites shown in \@ref(tab:showSites). WHO publishes information on the health consequences and "acceptable" exposure levels for each of these.
lDT <- data.table::melt(dtW, id.vars=c("site","date","code","latitude", "longitude", "site_type"), measure.vars = c("no","no2","nox","pm10","nv10","v10", "ws","wd"), value.name = "value" # varies ) # remove NA lDT <- lDT[!is.na(value)] t <- lDT[,.(from = min(date), to = max(date), nObs = .N), keyby = .(site, variable)] kableExtra::kable(t, caption = "Dates data available by site and measure", digits = 2) %>% kable_styling()
Summarise previously downloaded and processed data... Note that this may not be completely up to date.
skimr::skim(dfW)
Table \@ref(tab:summariseData) gives an indication of the availability of the different measures.
In this section we present graphical analysis of the previoulsy downloaded data. Note this is just a snapshot of the data available.
yLab <- "Nitrogen Dioxide (ug/m3)" t <- lDT[variable == "no2", .(mean = mean(value, na.rm = TRUE), sd = sd(value, na.rm = TRUE), min = min(value, na.rm = TRUE), max = max(value, na.rm = TRUE)), keyby = .(site)] kableExtra::kable(t, caption = "Summary of no2 data") %>% kable_styling()
Table \@ref(tab:no2Summary) suggests that there may be a few (r nrow(lDT[variable == "no2" & value < 0])
) negative values. These are summarised in \@ref(tab:testNegNo2) while Figure \@ref(fig:testNegNo2) shows the availability and levels of the pollutant data over time.
t <- head(lDT[variable == "no2" & value < 0], 10) kableExtra::kable(t, caption = "Negative no2 values (up to first 6)") %>% kable_styling() t <- table(lDT[variable == "no2" & value < 0, .(site)]) kableExtra::kable(t, caption = "Negative no2 values (count by site)") %>% kable_styling() # dt,xvar, yvar,fillVar, yLab p <- makeTilePlot(lDT[variable == "no2"], xVar = "date", yVar = "site", fillVar = "value", yLab = yLab) p
# p <- ggplot2::ggplot(dt, aes(x = obsDateTime, # y = nox2, # colour = site, # alpha = 0.1)) + # geom_point(shape=4, size = 1) t <- lDT[variable == "no2" & value > 200][order(-value)] kableExtra::kable(caption = paste0("Values greater than WHO threshold (NO2 > ", hourlyno2Threshold_WHO , ")"), head(t, 10)) %>% kable_styling() p <- makeDotPlot(lDT[variable == "no2"], xVar = "date", yVar = "value", byVar = "site", yLab = yLab) p <- p + geom_hline(yintercept = hourlyno2Threshold_WHO) + labs(caption = "Reference line = WHO hourly guideline threshold") if(doPlotly){ p plotly::ggplotly(p + xlim(xlimMinDateTime, xlimMaxDateTime)) # interactive, xlimited } else { p }
Figure \@ref(fig:plotNo2Hourly) shows hourly values for all sites. In the study period there were r nrow(t)
hours when the hourly Nitrogen Dioxide level breached WHO guidelines. The worst 10 cases are shown in Table \@ref(tab:plotNo2Hourly).
lDT[, obsDate := lubridate::date(date)] plotDT <- lDT[variable == "no2", .(mean = mean(value, na.rm = TRUE)), keyby = .(obsDate, site)] p <- makeDotPlot(plotDT, xVar = "obsDate", yVar = "mean", byVar = "site", yLab = yLab) p <- p + geom_smooth() + # add smoothed line labs(caption = "Trend line = Generalized additive model (gam) with integrated smoothness estimation") if(doPlotly){ p plotly::ggplotly(p + xlim(xlimMinDate, xlimMaxDate)) # interactive, xlimited # interactive } else { p }
Figure \@ref(fig:plotNo2Daily) shows daily mean values for all sites over time and includes smoother trend lines for each site.
Clearly the mean daily values show less variance (and less extremes) than the hourly data and there has also been a decreasing trend over time.
Wind rose
openair::windRose(dfW)
Pollution rose
openair::pollutionRose(dfW, pollutant = "no2")
We get a slightly higher % of high measures when the wind is from the SE?
Report generated using knitr in RStudio with r R.version.string
running on r R.version$platform
(r Sys.info()[3]
).
t <- proc.time() - startTime elapsed <- t[[3]]
Analysis completed in r elapsed
seconds ( r round(elapsed/60,2)
minutes).
R packages used:
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.