In CfSOtago/airQual: Code to analyse Air Quality data in the UK & NZ over time

# Set knitr options
knitr::opts_chunk$set(echo = TRUE,          # echo code so reader can see what is happening; hide using yaml
                      warning = FALSE,
                      message = FALSE,
                      fig_caption = TRUE,
                      fig_height = 6,        # default, make it bigger to stretch vertical axis
                      fig_width = 8,
                      fig_width = 8,         # full width
                      tidy = TRUE)           # tidy up code in case echo = TRUE

# Load Packages ----
rmdLibs <- c("airQual",
             "ggplot2",
            "kableExtra",
            "openair", # for fancy air qual stuff
            "skimr",
            "viridis")
library(dkUtils)
dkUtils::loadLibraries(rmdLibs)

# Log compile time:
myParams$startTime <- proc.time()

# Parameters ----
# set xlim for plotly to reduce plot size & load speed
myParams$xlimMinDateTime <- lubridate::as_datetime("2018-01-01 00:00:00")
myParams$xlimMaxDateTime <- lubridate::as_datetime("2020-06-01 00:00:00")
myParams$xlimMinDate <- lubridate::as_date("2018-01-01")
myParams$xlimMaxDate <- lubridate::as_date("2020-06-01")

myParams$oneYearAgo <- lubridate::as_datetime(now() - (365*24*60*60))

# set values for annotations
myParams$lockDownStartDate <- as.Date("2020-03-24")
myParams$lockDownStartDateTime <- lubridate::as_datetime("2020-03-24 00:00:00")
myParams$lockDownEndDate <- lubridate::today()
myParams$lockDownEndDateTime <- lubridate::now()

myParams$recentCutDate <- as.Date("2020-03-01")

myParams$comparePlotCutDate <- as.Date("2020-02-01")

myParams$gamCap <- "Trend line = Generalized additive model (gam) with integrated smoothness estimation"
myParams$lockdownCap <- "\nColoured rectangle = UK covid lockdown to date"
myParams$weekendCap <- "\nShaded rectangle = weekends & public holidays"
myParams$noThresh <- "\nNo specified WHO threshold"

myParams$myAlpha <- 0.1
myParams$vLineAlpha <- 0.4
myParams$vLineCol <- "red" # http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/#a-colorblind-friendly-palette
myParams$myTextSize <- 4

# Functions ----
# these may be useful elsewhere but...
makeDailyComparisonDT <- function(dt){
  baseDT <- dt[compareYear == "2017-2019", 
                .(baseMean = mean(value),
                  baseMedian = median(value)
                  ), 
                keyby = list(fixedDate, fixedDoW, compareYear)
                ]

testDT <- dt[compareYear == "2020", 
                .(testMean = mean(value),
                  testMedian = median(value)
                  ), 
                keyby = list(fixedDate, fixedDoW, compareYear, site)
                ]

setkey(baseDT, fixedDate, fixedDoW)
setkey(baseDT, fixedDate, fixedDoW)

plotDT <- baseDT[testDT] # auto drops non matches to 2020
plotDT[, pcDiffMean := 100*(testMean - baseMean)/baseMean] # -ve value indicates lower
  plotDT[, pcDiffMedian:= 100*(testMedian - baseMedian)/baseMedian] # -ve value indicates lower
  plotDT[, pos := ifelse(pcDiffMean > 0 , "Pos", "Neg")] # want to colour the line sections - how?
  # final plot - adds annotations
return(plotDT)
}

makeWeeklyComparisonDT <- function(dt){
  baseDT <- dt[compareYear == "2017-2019", 
               .(baseMean = mean(value),
                 baseMedian = median(value)
               ), 
               keyby = list(weekNo, compareYear)
               ]

  testDT <- dt[compareYear == "2020", 
               .(testMean = mean(value),
                 testMedian = median(value)
               ), 
               keyby = list(weekNo, compareYear, site)
               ]

  setkey(baseDT, weekNo)
  setkey(baseDT, weekNo)

  plotDT <- baseDT[testDT] # auto drops non matches to 2020
  plotDT[, pcDiffMean := 100*(testMean - baseMean)/baseMean] # -ve value indicates lower
  plotDT[, pcDiffMedian:= 100*(testMedian - baseMedian)/baseMedian] # -ve value indicates lower
  plotDT[, pos := ifelse(pcDiffMean > 0 , "Pos", "Neg")] # want to colour the line sections - how?
  # final plot - adds annotations
  return(plotDT)
}


# why do these only work here?
addWeekendsDate <- function(p, yMin, yMax){
  p <- p + annotate("rect", xmin = as.Date("2020-03-07"),
                    xmax = as.Date("2020-03-09"),
                    ymin = yMin, ymax = yMax,
                    alpha = myParams$weAlpha, fill = myParams$weFill) +
    annotate("rect", xmin = as.Date("2020-03-14"),
             xmax = as.Date("2020-03-16"),
             ymin = yMin, ymax = yMax,
             alpha = myParams$weAlpha, fill = myParams$weFill) +
    annotate("rect", xmin = as.Date("2020-03-21"),
             xmax = as.Date("2020-03-23"),
             ymin = yMin, ymax = yMax,
             alpha = myParams$weAlpha, fill = myParams$weFill) +
    annotate("rect", xmin = as.Date("2020-03-28"),
             xmax = as.Date("2020-03-30"),
             ymin = yMin, ymax = yMax,
             alpha = myParams$weAlpha, fill = myParams$weFill) +
    annotate("rect", xmin = as.Date("2020-04-04"),
             xmax = as.Date("2020-04-06"),
             ymin = yMin, ymax = yMax,
             alpha = myParams$weAlpha, fill = myParams$weFill) +
    annotate("rect", xmin = as.Date("2020-04-10"),
             xmax = as.Date("2020-04-14"), # Easter
             ymin = yMin, ymax = yMax,
             alpha = myParams$weAlpha, fill = myParams$weFill) +
    annotate("text", x = as.Date("2020-04-10"),
             y = yMax*myParams$labelPos,
             label = "Easter 2020") + # Easter
    annotate("rect", xmin = as.Date("2020-04-18"),
             xmax = as.Date("2020-04-20"),
             ymin = yMin, ymax = yMax,
             alpha = myParams$weAlpha, fill = myParams$weFill) +
    annotate("rect", xmin = as.Date("2020-04-25"),
             xmax = as.Date("2020-04-27"),
             ymin = yMin, ymax = yMax,
             alpha = myParams$weAlpha, fill = myParams$weFill) +
    annotate("rect", xmin = as.Date("2020-05-02"),
             xmax = as.Date("2020-05-04"),
             ymin = yMin, ymax = yMax,
             alpha = myParams$weAlpha, fill = myParams$weFill) +
    annotate("rect", xmin = as.Date("2020-05-08"),
             xmax = as.Date("2020-05-11"),
             ymin = yMin, ymax = yMax,
             alpha = myParams$weAlpha, fill = myParams$weFill) +
    annotate("text", x = as.Date("2020-05-08"),
             y = yMax*myParams$labelPos,
             label = "VE Day 2020") + # VE Day
    annotate("rect", xmin = as.Date("2020-05-16"),
             xmax = as.Date("2020-05-18"),
             ymin = yMin, ymax = yMax,
             alpha = myParams$weAlpha, fill = myParams$weFill) +
    annotate("rect", xmin = as.Date("2020-05-23"),
             xmax = as.Date("2020-05-25"),
             ymin = yMin, ymax = yMax,
             alpha = myParams$weAlpha, fill = myParams$weFill) +
    annotate("rect", xmin = as.Date("2020-05-30"),
             xmax = as.Date("2020-06-01"),
             ymin = yMin, ymax = yMax,
             alpha = myParams$weAlpha, fill = myParams$weFill)
  return(p)
}

addWeekendsDateTime <- function(p, yMin, yMax){
  p <- p + annotate("rect", xmin = lubridate::as_datetime("2020-03-07 00:00:00"),
                    xmax = lubridate::as_datetime("2020-03-08 23:59:59"),
                    ymin = yMin, ymax = yMax,
                    alpha = myParams$weAlpha, fill = myParams$weFill) +
    annotate("rect", xmin = lubridate::as_datetime("2020-03-14 00:00:00"),
             xmax = lubridate::as_datetime("2020-03-15 23:59:59"),
             ymin = yMin, ymax = yMax,
             alpha = myParams$weAlpha, fill = myParams$weFill) +
    annotate("rect", xmin = lubridate::as_datetime("2020-03-21 00:00:00"),
             xmax = lubridate::as_datetime("2020-03-22 23:59:59"),
             ymin = yMin, ymax = yMax,
             alpha = myParams$weAlpha, fill = myParams$weFill) +
    annotate("rect", xmin = lubridate::as_datetime("2020-03-28 00:00:00"),
             xmax = lubridate::as_datetime("2020-03-29 23:59:59"),
             ymin = yMin, ymax = yMax,
             alpha = myParams$weAlpha, fill = myParams$weFill) +
    annotate("rect", xmin = lubridate::as_datetime("2020-04-04 00:00:00"),
             xmax = lubridate::as_datetime("2020-04-05 23:59:59"),
             ymin = yMin, ymax = yMax,
             alpha = myParams$weAlpha, fill = myParams$weFill) +
    annotate("rect", xmin = lubridate::as_datetime("2020-04-10 00:00:00"),
             xmax = lubridate::as_datetime("2020-04-13 23:59:59"), # Easter
             ymin = yMin, ymax = yMax,
             alpha = myParams$weAlpha, fill = myParams$weFill) +
    annotate("text", x = lubridate::as_datetime("2020-04-10 00:00:00"),
             y = yMax*myParams$labelPos,
             label = "Easter") + # Easter
    annotate("rect", xmin = lubridate::as_datetime("2020-04-18 00:00:00"),
             xmax = lubridate::as_datetime("2020-04-19 23:59:59"),
             ymin = yMin, ymax = yMax,
             alpha = myParams$weAlpha, fill = myParams$weFill) +
    annotate("rect", xmin = lubridate::as_datetime("2020-04-25 00:00:00"),
             xmax = lubridate::as_datetime("2020-04-26 23:59:59"),
             ymin = yMin, ymax = yMax,
             alpha = myParams$weAlpha, fill = myParams$weFill) +
    annotate("rect", xmin = lubridate::as_datetime("2020-05-02 00:00:00"),
             xmax = lubridate::as_datetime("2020-05-03 23:59:59"),
             ymin = yMin, ymax = yMax,
             alpha = myParams$weAlpha, fill = myParams$weFill) +
    annotate("rect", xmin = lubridate::as_datetime("2020-05-08 00:00:00"),
             xmax = lubridate::as_datetime("2020-05-10 23:59:59"),
             ymin = yMin, ymax = yMax,
             alpha = myParams$weAlpha, fill = myParams$weFill) +
    annotate("text", x = lubridate::as_datetime("2020-05-08 00:00:00"),
             y = yMax*myParams$labelPos,
             label = "VE Day") + # VE Day
    annotate("rect", xmin = lubridate::as_datetime("2020-05-16 00:00:00"),
             xmax = lubridate::as_datetime("2020-05-17 23:59:59"),
             ymin = yMin, ymax = yMax,
             alpha = myParams$weAlpha, fill = myParams$weFill) +
    annotate("rect", xmin = lubridate::as_datetime("2020-05-23 00:00:00"),
             xmax = lubridate::as_datetime("2020-05-24 23:59:59"),
             ymin = yMin, ymax = yMax,
             alpha = myParams$weAlpha, fill = myParams$weFill) +
    annotate("rect", xmin = lubridate::as_datetime("2020-05-30 00:00:00"),
             xmax = lubridate::as_datetime("2020-05-31 23:59:59"),
             ymin = yMin, ymax = yMax,
             alpha = myParams$weAlpha, fill = myParams$weFill)
  return(p)
}

addLockdownRectDateTime <- function(p, yMin, yMax){
  # assumes p has x = obsDateTime
  # p <- p + annotate("text", x = myParams$lockDownStartDateTime, 
  #            y = yMax * 0.4, angle = 10,size = myParams$myTextSize,
  #            label = "UK covid lockdown to date", hjust = 0.5)
  p <- p + annotate("rect", xmin = myParams$lockDownStartDateTime,
                    xmax = myParams$lockDownEndDateTime, 
                    ymin = yMin-1, ymax = yMax+1, 
                    alpha = myParams$myAlpha, 
                    fill = myParams$vLineCol, 
                    colour = myParams$vLineCol) 

  return(p)
}

addLockdownRectDate <- function(p, yMin, yMax){
  # assumes p has x = obsDate
  # p <- p + annotate("text", x = myParams$lockDownStartDate, 
  #            y = yMax * 0.4, angle = 10,size = myParams$myTextSize,
  #            label = "UK covid lockdown to date", hjust = 0.5)
  p <- p + annotate("rect", xmin = myParams$lockDownStartDate,
                    xmax = myParams$lockDownEndDate, 
                    ymin = yMin-1, ymax = yMax+1, 
                    alpha = myParams$myAlpha, 
                    fill = myParams$vLineCol, 
                    colour = myParams$vLineCol)
  return(p)
}

addLockdownRectWeek <- function(p, yMin, yMax){
  p <- p + annotate("rect", 
                    xmin = (lubridate::week(myParams$lockDownStartDate) - 0.1),
                    xmax = (lubridate::week(myParams$lockDownEndDate) + 0.1), 
                    ymin = yMin - 1, ymax = yMax + 1, 
                    alpha = myParams$myAlpha, 
                    fill = myParams$vLineCol, 
                    colour = myParams$vLineCol)
  return(p)
}

# only makes sense to use these for x axis covering March onwards
myParams$weAlpha <- 0.3
myParams$weFill <- "grey50"
myParams$labelPos <- 0.9

Introduction

This report describes exploratory analysis of changes in air quality in the City of Southampton, UK in Spring 2020.

lastHA <- max(fixedDT[source == "hantsAir"]$dateTimeUTC)
diffHA <- lubridate::now() - lastHA
lastAURN <- max(fixedDT[source == "AURN"]$dateTimeUTC)
diffAURN <- lubridate::now() - lastAURN

Data for Southampton downloaded from :

http://www.hantsair.org.uk/hampshire/asp/Bulletin.asp?la=Southampton (see also https://www.southampton.gov.uk/environmental-issues/pollution/air-quality/);
https://uk-air.defra.gov.uk/networks/network-info?view=aurn

Southampton City Council collects various forms of air quality data at the sites shown in Table \@ref(tab:showSites). The data is available in raw form from http://www.hantsair.org.uk/hampshire/asp/Bulletin.asp?la=Southampton&bulletin=daily&site=SH5.

Some of these sites feed data to AURN. The data that goes via AURN is ratified to check for outliers and instrument/measurement error. AURN data less than six months old has not undergone this process. AURN data is (c) Crown 2020 copyright Defra and available for re-use via https://uk-air.defra.gov.uk, licenced under the Open Government Licence (OGL).

Data

In this report we use data from the following sources:

http://www.hantsair.org.uk/hampshire/asp/Bulletin.asp?la=Southampton last updated at r lastHA;
https://uk-air.defra.gov.uk/networks/network-info?view=aurn last updated at r lastAURN.

Table \@ref(tab:showSites) shows the available sites and sources. Note that some of the non-AURN sites appear to have stopped updating recently. For a detailed analysis of recent missing data see Section \@ref(annexMissing).

t <- fixedDT[!is.na(value),.(nObs = .N, firstData = min(dateTimeUTC), latestData = max(dateTimeUTC),
           nMeasures = uniqueN(pollutant)), keyby = .(site, source)]

kableExtra::kable(t, caption = "Sites, data source and number of valid observations. note that measures includes wind speed and direction in the AURN sourced data",
                  digits = 2) %>%
  kable_styling()

Table \@ref(tab:showPollutants) shows the poillutants recorded at each site.

t <- with(fixedDT[!is.na(value)], table(pollutant, site))

kableExtra::kable(t, caption = "Sites, pollutant and number of valid observations",
                  digits = 2) %>%
  kable_styling()

To avoid confusion and 'double counting', in the remainder of the analysis we replace the Southampton AURN site data with the data for the same site sourced via AURN as shown in Table \@ref(tab:selectFinalSites). This has the disadvantage that the data is slightly less up to date (see Table \@ref(tab:showSites)). As will be explained below in the comparative analysis we will use only the AURN data to avoid missing data issues.

fixedDT <- fixedDT[!(site %like% "AURN site")]

t <- fixedDT[!is.na(value),.(nObs = .N,
           nPollutants = uniqueN(pollutant),
           lastDate = max(dateTimeUTC)), keyby = .(site, source)]

kableExtra::kable(t, caption = "Sites, data source and number of valid observations",
                  digits = 2) %>%
  kable_styling()

We use this data to compare:

pre and during-lockdown air quality measures
air quality measures during lockdown 2020 with average measures for the same time periods in the preceding 3 years (2017-2019)

It should be noted that air pollution levels in any given period of time are highly dependent on the prevailing meteorological conditions. As a result it can be very difficult to disentangle the affects of a reduction in source strength from the affects of local surface conditions. This is abundantly clear in the analysis which follows given that the Easter weekend was forecast to have very high import of pollution from Europe and that the wind direction and speed was highly variable across the lockdown period (see Figure \@ref(fig:recentWind)).

Further, air quality is not wholly driven by sources that lockdown might suppress and indeed that suppression may lead to rebound affects. For example we might expect more emissions due to increased domestic heating during cooler lockdown periods. As a result the analysis presented below must be considered a preliminary ‘before meteorological adjustment’ and ‘before controlling for other sources’ analysis of the affect of lockdown on air quality in Southampton.

For much more detailed analysis see a longer and very messy data report.

WHO air quality thresholds

A number of the following plots show the relevant WHO air quality thresholds and limits. These are taken from:

https://www.who.int/news-room/fact-sheets/detail/ambient-(outdoor)-air-quality-and-health

Nitrogen Dioxide (no2)

yLab <- "Nitrogen Dioxide (ug/m3)"
no2dt <- fixedDT[pollutant == "no2"]

Figure \@ref(fig:theilSenNO2) shows the NO2 trend over time. Is lockdown below trend?

no2dt[, date := as.Date(dateTimeUTC)]  # set date to date for this one

oaNO2 <- openair::TheilSen(no2dt[date < as.Date("2020-06-01")], "value", 
                  ylab = "NO2", deseason = TRUE,
                  xlab = "Year",
                  date.format = "%Y",
                  date.breaks = 4)
p <- oaNO2$plot

getModelTrendTable <- function(oa, fname){
  # oa is an openAir object created by theilSen
  # calculates the % below trend using the theil sen slope line parameters
  # oa <- oaGWh
  oaData <- as.data.table(oa$data$main.data)
  rDT <- oaData[, .(date, conc, a,b,slope)]
  # https://github.com/davidcarslaw/openair/blob/master/R/TheilSen.R#L192
  # and
  # https://github.com/davidcarslaw/openair/blob/master/R/TheilSen.R#L625
  rDT[, x := time_length(date - as.Date("1970-01-01"), unit="days")] # n days since x = 0
  rDT[, expectedVal := a + (b * x)] # b = slope / 365

  # checks
  p <- ggplot2::ggplot(rDT, aes(x = date)) + 
    geom_line(aes(y = conc)) +
    labs(y = "Value",
         caption = fname)
  p <- p + geom_line(aes(y = expectedVal), linetype = "dashed")
  ggplot2::ggsave(here::here("docs", "plots", paste0("SSC_trendModelTestPlot_", fname, ".png")))
  rDT[, diff := conc - expectedVal]
  rDT[, pcDiff := (diff/expectedVal)*100]

  t <- rDT[,.(date, conc, a,b,slope,expectedVal, diff, pcDiff)]
  return(t)
}

t <- getModelTrendTable(oaNO2, fname = "NO2")

ft <- dcast(t[date >= as.Date("2020-01-01") & date < as.Date("2020-06-01")], 
            date ~ ., value.var = c("diff","pcDiff"))
ft[, date := format.Date(date, format = "%b %Y")]
kableExtra::kable(ft, caption = "Units and % above/below expected", digits = 2) %>%
  kable_styling()

Figure \@ref(fig:no2recent) shows the most recent hourly data.

recentDT <- no2dt[obsDate > myParams$recentCutDate]
p <- makeDotPlot(recentDT, 
                 xVar = "dateTimeUTC", 
                 xLab = "Date & Time",
                 yVar = "value", 
                 byVar = "site", 
                 yLab = yLab)

p <- p +
  scale_x_datetime(date_breaks = "2 day", date_labels =  "%a %d %b")  +
  theme(axis.text.x=element_text(angle=90, hjust=1))

p <- p + geom_hline(yintercept = myParams$hourlyNo2Threshold_WHO) +
  labs(caption = paste0(myParams$lockdownCap, myParams$weekendCap,
                        "\nReference line = WHO hourly threshold (", myParams$hourlyNo2Threshold_WHO, ")")
       )

# final plot - adds annotations
yMin <- min(recentDT$value)
yMax <- max(recentDT$value)

p <- addLockdownRectDateTime(p, yMin, yMax)
addWeekendsDateTime(p,yMin, yMax) +
  guides(colour=guide_legend(ncol=2))

Figure \@ref(fig:no2recentProfile) shows the most recent hourly data by date and time of day.

recentDT[, time := hms::as_hms(dateTimeUTC)]
yMin <- min(recentDT$time)
yMax <- max(recentDT$time)
p <- profileTilePlot(recentDT, yLab)
p <- addLockdownRectDate(p, yMin, yMax)
addWeekendsDate(p, yMin, yMax) + 
  labs(caption = paste0(myParams$lockdownCap, myParams$weekendCap))

Figure \@ref(fig:no2compare) shows the most recent mean daily values compared to previous years for the two AURN sites which do not have missing data. We have shifted the dates for the comparison years to ensure that weekdays and weekends line up. Note that this plot shows daily means with no indications of variance. Visible differences are therefore purely indicative at this stage.

plotDT <- no2dt[site %like% "via AURN" & fixedDate <= lubridate::today() & 
                  fixedDate >= myParams$comparePlotCut, 
                .(meanVal = mean(value),
                  medianVal = median(value),
                  nSites = uniqueN(site)), 
                keyby = .(fixedDate, compareYear, site)]

# final plot - adds annotations
yMin <- min(plotDT$meanVal)
yMax <- max(plotDT$meanVal)

p <- compareYearsPlot(plotDT, xVar = "fixedDate", 
                      yVar = "meanVal",
                      colVar = "compareYear")

p <- addLockdownRectDate(p, yMin, yMax) +
  labs(x = "Date",
         y = "Daily mean",
       caption = paste0(myParams$lockdownCap, myParams$weekendCap, myParams$noThresh))

p <- addWeekendsDate(p, yMin, yMax) + scale_x_date(date_breaks = "7 day",
                                  date_labels =  "%a %d %b",
                                  date_minor_breaks = "1 day")

p + facet_grid(site ~ .) +
  theme(strip.text.y.right = element_text(angle = 90))

Figure \@ref(fig:no2pcDiffcompareDay) and \@ref(fig:no2pcDiffcompareWeek) show the % difference between the daily means for 2020 vs 2017-2019 (reference period). In both cases we can see that NO2 levels in 2020 were generally already lower than the reference period yet are not consistently lower even during the lockdown period. The affects of covid lockdown are not clear cut...

dailyDT <- makeDailyComparisonDT(no2dt[site %like% "via AURN" & 
                                         fixedDate >= myParams$comparePlotCut])

p <- compareYearsDiffPlotDaily(dailyDT) +
  labs(caption = paste0(myParams$lockdownCap, myParams$weekendCap))

yMin <- min(dailyDT$pcDiffMean)
yMax <- max(dailyDT$pcDiffMean)
print(paste0("Max drop %:", round(yMin)))
print(paste0("Max increase %:", round(yMax)))

p <- addLockdownRectDate(p, yMin, yMax)
addWeekendsDate(p, yMin, yMax)

weeklyDT <- makeWeeklyComparisonDT(no2dt[site %like% "via AURN" & 
                                         fixedDate >= myParams$comparePlotCut])

p <- compareYearsDiffPlotWeekly(weeklyDT, ldStart = myParams$lockDownStartDate,
                           ldEnd = myParams$lockDownEndDate) +
  labs(caption = paste0(myParams$lockdownCap))

yMin <- min(weeklyDT$pcDiffMean)
yMax <- max(weeklyDT$pcDiffMean)
print(paste0("Max drop %:", round(yMin)))
print(paste0("Max increase %:", round(yMax)))

addLockdownRectWeek(p, yMin, yMax)

Beware seasonal trends and meteorological affects

Oxides of Nitrogen (nox)

yLab <- "Oxides of Nitrogen (ug/m3)"
noxdt <- fixedDT[pollutant == "nox"]

Figure \@ref(fig:theilSenNOx) shows the NOx trend over time. Is lockdown below trend?

noxdt[, date := as.Date(dateTimeUTC)]  # set date to date for this one

oaNOx <- openair::TheilSen(noxdt[date < as.Date("2020-06-01")], "value", 
                  ylab = "NOx", deseason = TRUE,
                  xlab = "Year",
                  date.format = "%Y",
                  date.breaks = 4)
p <- oaNOx$plot

t <- getModelTrendTable(oaNOx, fname = "NOx")

ft <- dcast(t[date >= as.Date("2020-01-01") & date < as.Date("2020-06-01")], date ~ ., value.var = c("diff","pcDiff"))
ft[, date := format.Date(date, format = "%b %Y")]
kableExtra::kable(ft, caption = "Units and % above/below expected", digits = 2) %>%
  kable_styling()

Figure \@ref(fig:noxrecent) shows the most recent hourly data.

recentDT <- noxdt[!is.na(value) & obsDate > myParams$recentCutDate]
p <- makeDotPlot(recentDT, 
                 xVar = "dateTimeUTC", 
                 xLab = "Date & Time",
                 yVar = "value", 
                 byVar = "site", 
                 yLab = yLab)

p <- p +
  scale_x_datetime(date_breaks = "2 day", date_labels =  "%a %d %b")  +
  theme(axis.text.x=element_text(angle=90, hjust=1)) +
  labs(caption = paste0(myParams$lockdownCap, myParams$weekendCap,myParams$noThresh))

# final plot - adds annotations
yMin <- min(recentDT$value)
yMax <- max(recentDT$value)

p <- addLockdownRectDateTime(p, yMin, yMax)

addWeekendsDateTime(p, yMin, yMax)

Figure \@ref(fig:noxrecentProfile) shows the most recent hourly data by date and time of day.

recentDT[, time := hms::as_hms(dateTimeUTC)]
yMin <- min(recentDT$time)
yMax <- max(recentDT$time)
p <- profileTilePlot(recentDT, yLab)
addWeekendsDate(p, yMin, yMax) + 
  labs(caption = paste0(myParams$lockdownCap, myParams$weekendCap))

Figure \@ref(fig:noxcompare) shows the most recent mean daily values compared to previous years for the two AURN sites.

plotDT <- noxdt[site %like% "via AURN" & fixedDate <= lubridate::today() & 
                  fixedDate >= myParams$comparePlotCut , 
                .(meanVal = mean(value),
                  medianVal = median(value),
                  nSites = uniqueN(site)), 
                keyby = .(fixedDate, compareYear, site)]

# final plot - adds annotations
yMin <- min(plotDT$meanVal)
yMax <- max(plotDT$meanVal)

p <- compareYearsPlot(plotDT, xVar = "fixedDate", 
                      yVar = "meanVal",
                      colVar = "compareYear")
p <- addLockdownRectDate(p, yMin, yMax) +
  labs(x = "Date", y = "Daily mean", 
       caption = paste0(myParams$lockdownCap, myParams$weekendCap,myParams$noThresh))
p <- addWeekendsDate(p, yMin, yMax)

p + facet_grid(site ~ .) +
  theme(strip.text.y.right = element_text(angle = 90))

Figure \@ref(fig:noxpcDiffcompareDay) and \@ref(fig:noxpcDiffcompareWeek) show the % difference between the daily and weekly means for 2020 vs 2017-2019 (reference period).

dailyDT <- makeDailyComparisonDT(noxdt[site %like% "via AURN" & fixedDate >= myParams$comparePlotCut])

p <- compareYearsDiffPlotDaily(dailyDT) +
  labs(caption = paste0(myParams$lockdownCap, myParams$weekendCap))

yMin <- min(dailyDT$pcDiffMean)
yMax <- max(dailyDT$pcDiffMean)
print(paste0("Max drop %:", round(yMin)))
print(paste0("Max increase %:", round(yMax)))

p <- addLockdownRectDate(p, yMin, yMax)
addWeekendsDate(p, yMin, yMax)

weeklyDT <- makeWeeklyComparisonDT(noxdt[site %like% "via AURN" & fixedDate >= myParams$comparePlotCut])

p <- compareYearsDiffPlotWeekly(weeklyDT, ldStart = myParams$lockDownStartDate,
                           ldEnd = myParams$lockDownEndDate) +
  labs(caption = paste0(myParams$lockdownCap, myParams$weekendCap))

yMin <- min(weeklyDT$pcDiffMean)
yMax <- max(weeklyDT$pcDiffMean)
print(paste0("Max drop %:", round(yMin)))
print(paste0("Max increase %:", round(yMax)))
addLockdownRectWeek(p, yMin, yMax)

Sulphour Dioxide

yLab <- "Sulphour Dioxide (ug/m3)"
so2dt <- fixedDT[pollutant == "so2"]

Figure \@ref(fig:theilSenSO2) shows the SO2 trend over time. Is lockdown below trend?

so2dt[, date := as.Date(dateTimeUTC)]  # set date to date for this one

oaSO2 <- openair::TheilSen(noxdt[date < as.Date("2020-06-01")], "value", 
                  ylab = "SO2", deseason = TRUE,
                  xlab = "Year",
                  date.format = "%Y",
                  date.breaks = 4)

t <- getModelTrendTable(oaSO2, fname = "SO2")

ft <- dcast(t[date >= as.Date("2020-01-01") & date < as.Date("2020-06-01")],
              date ~ ., value.var = c("diff","pcDiff"))
ft[, date := format.Date(date, format = "%b %Y")]
kableExtra::kable(ft, caption = "Units and % above/below expected", digits = 2) %>%
  kable_styling()

Figure \@ref(fig:so2recent) shows the most recent hourly data.

recentDT <- so2dt[!is.na(value) & obsDate > myParams$recentCutDate]
p <- makeDotPlot(recentDT, 
                 xVar = "dateTimeUTC", 
                 xLab = "Date & Time",
                 yVar = "value", 
                 byVar = "site", 
                 yLab = yLab)

p <- p +
  scale_x_datetime(date_breaks = "2 day", date_labels =  "%a %d %b")  +
  theme(axis.text.x=element_text(angle=90, hjust=1)) +
  labs(caption = paste0(myParams$lockdownCap, myParams$weekendCap,myParams$noThresh))

yMax <- max(recentDT$value)
yMin <- min(recentDT$value)
p <- addLockdownRectDateTime(p, yMin, yMax)
addWeekendsDateTime(p, yMin, yMax)

Figure \@ref(fig:so2recentProfile) shows the most recent hourly data by date and time of day and time of day.

recentDT[, time := hms::as_hms(dateTimeUTC)]

yMin <- min(recentDT$time)
yMax <- max(recentDT$time)
p <- profileTilePlot(recentDT, yLab)
addWeekendsDate(p, yMin, yMax) + 
  labs(caption = paste0(myParams$lockdownCap, myParams$weekendCap))

Figure \@ref(fig:so2compare) shows the most recent mean daily values compared to previous years.

plotDT <- so2dt[site %like% "via AURN" & fixedDate <= lubridate::today() & 
                  fixedDate >= myParams$comparePlotCut , 
                .(meanVal = mean(value),
                  medianVal = median(value),
                  nSites = uniqueN(site)), 
                keyby = .(fixedDate, compareYear, site)]

# final plot - adds annotations
yMin <- min(plotDT$mean)
yMax <- max(plotDT$mean)

p <- compareYearsPlot(plotDT, xVar = "fixedDate", 
                      yVar = "meanVal",
                      colVar = "compareYear")
p <- addLockdownRectDate(p, yMin, yMax) +
  geom_hline(yintercept = myParams$dailySo2Threshold_WHO) +
  labs(x = "Date", y = "Daily mean", 
       caption = paste0(myParams$lockdownCap, myParams$weekendCap,
                        "\nReference line = WHO daily threshold (", myParams$dailySo2Threshold_WHO, ")")
       )
p <- addWeekendsDate(p, yMin, yMax)

p + facet_grid(site ~ .) +
  theme(strip.text.y.right = element_text(angle = 90))

Figure \@ref(fig:so2pcDiffcompareDay) and \@ref(fig:so2pcDiffcompareWeek) show the % difference between the daily and weekly means for 2020 vs 2017-2019 (reference period).

dailyDT <- makeDailyComparisonDT(so2dt[site %like% "via AURN" & fixedDate >= myParams$comparePlotCut])

p <- compareYearsDiffPlotDaily(dailyDT) +
  labs(caption = paste0(myParams$lockdownCap))

yMin <- min(dailyDT$pcDiffMean)
yMax <- max(dailyDT$pcDiffMean)
print(paste0("Max drop %:", round(yMin)))
print(paste0("Max increase %:", round(yMax)))

p <- addLockdownRectDate(p, yMin, yMax)
addWeekendsDate(p, yMin, yMax)

weeklyDT <- makeWeeklyComparisonDT(so2dt[site %like% "via AURN" & 
                                           fixedDate >= myParams$comparePlotCut])

p <- compareYearsDiffPlotWeekly(weeklyDT,
                                ldStart = myParams$lockDownStartDate,
                           ldEnd = myParams$lockDownEndDate) +
  labs(caption = paste0(myParams$lockdownCap))

yMin <- min(weeklyDT$pcDiffMean)
yMax <- max(weeklyDT$pcDiffMean)
print(paste0("Max drop %:", round(yMin)))
print(paste0("Max increase %:", round(yMax)))
addLockdownRectWeek(p, yMin, yMax)

Beware seasonal trends and meteorological affects

Ozone

yLab <- "Ozone (ug/m3)"
o3dt <- fixedDT[pollutant == "o3"]

Figure \@ref(fig:theilSenO3) shows the O3 trend over time. Is lockdown below trend?

o3dt[, date := as.Date(dateTimeUTC)]  # set date to date for this one

oaO3 <- openair::TheilSen(o3dt[date < as.Date("2020-06-01")], "value", 
                  ylab = "O3", deseason = TRUE,
                  xlab = "Year",
                  date.format = "%Y",
                  date.breaks = 4)

t <- getModelTrendTable(oaO3, fname = "O3")

ft <- dcast(t[date >= as.Date("2020-01-01") & date < as.Date("2020-06-01")],
              date ~ ., value.var = c("diff","pcDiff"))
ft[, date := format.Date(date, format = "%b %Y")]
kableExtra::kable(ft, caption = "Units and % above/below expected", digits = 2) %>%
  kable_styling()

Figure \@ref(fig:03recent) shows the most recent hourly data.

recentDT <- o3dt[!is.na(value) & obsDate > myParams$recentCutDate]
p <- makeDotPlot(recentDT, 
                 xVar = "dateTimeUTC", 
                 xLab = "Date & Time",
                 yVar = "value", 
                 byVar = "site", 
                 yLab = yLab)

p <- p +
  scale_x_datetime(date_breaks = "2 day", date_labels =  "%a %d %b")  +
  theme(axis.text.x=element_text(angle=90, hjust=1)) +
  labs(caption = paste0(myParams$lockdownCap, myParams$weekendCap,myParams$noThresh))

yMax <- max(recentDT$value)
yMin <- min(recentDT$value)
p <- addLockdownRectDateTime(p, yMin, yMax)
addWeekendsDateTime(p, yMin, yMax)

Figure \@ref(fig:o3recentProfile) shows the most recent hourly data by date and time of day.

recentDT[, time := hms::as_hms(dateTimeUTC)]

yMin <- min(recentDT$time)
yMax <- max(recentDT$time)
p <- profileTilePlot(recentDT, yLab)
addWeekendsDate(p, yMin, yMax) + 
  labs(caption = paste0(myParams$lockdownCap, myParams$weekendCap))

Figure \@ref(fig:o3compare) shows the most recent mean daily values compared to previous years.

plotDT <- o3dt[site %like% "via AURN" & fixedDate <= lubridate::today() & 
                  fixedDate >= myParams$comparePlotCut , 
                .(meanVal = mean(value),
                  medianVal = median(value),
                  nSites = uniqueN(site)), 
                keyby = .(fixedDate, compareYear, site)]

# final plot - adds annotations
yMin <- min(plotDT$mean)
yMax <- max(plotDT$mean)

p <- compareYearsPlot(plotDT, xVar = "fixedDate", 
                      yVar = "meanVal",
                      colVar = "compareYear")
p <- addLockdownRectDate(p, yMin, yMax) +
  geom_hline(yintercept = myParams$dailyO3Threshold_WHO) +
  labs(x = "Date", y = "Daily mean", 
       caption = paste0(myParams$lockdownCap, myParams$weekendCap,
                        "\nReference line = WHO daily threshold (", 
                               myParams$dailyO3Threshold_WHO, ")"
                        )
  )
p <- addWeekendsDate(p, yMin, yMax)
p + facet_grid(site ~ .) +
  theme(strip.text.y.right = element_text(angle = 90))

Figure \@ref(fig:o3pcDiffcompareDay) and \@ref(fig:o3pcDiffcompareWeek) show the % difference between the daily and weekly means for 2020 vs 2017-2019 (reference period).

dailyDT <- makeDailyComparisonDT(o3dt[site %like% "via AURN" & fixedDate >= myParams$comparePlotCut])

p <- compareYearsDiffPlotDaily(dailyDT) +
  labs(caption = paste0(myParams$lockdownCap))

yMin <- min(dailyDT$pcDiffMean)
yMax <- max(dailyDT$pcDiffMean)
print(paste0("Max drop %:", round(yMin)))
print(paste0("Max increase %:", round(yMax)))

p <- addLockdownRectDate(p, yMin, yMax)
addWeekendsDate(p, yMin, yMax)

weeklyDT <- makeWeeklyComparisonDT(o3dt[site %like% "via AURN" & fixedDate >= myParams$comparePlotCut])

p <- compareYearsDiffPlotWeekly(weeklyDT,ldStart = myParams$lockDownStartDate,
                           ldEnd = myParams$lockDownEndDate) +
  labs(caption = paste0(myParams$lockdownCap))

yMin <- min(weeklyDT$pcDiffMean)
yMax <- max(weeklyDT$pcDiffMean)
print(paste0("Max drop %:", round(yMin)))
print(paste0("Max increase %:", round(yMax)))
addLockdownRectWeek(p, yMin, yMax)

Beware seasonal trends and meteorological affects

PM 10

yLab <- "PM 10 (ug/m3)"
pm10dt <- fixedDT[pollutant == "pm10"]

Figure \@ref(fig:theilSenPM10) shows the PM10 trend over time. Is lockdown below trend?

pm10dt[, date := as.Date(dateTimeUTC)]  # set date to date for this one

oaPM10 <- openair::TheilSen(pm10dt[date < as.Date("2020-06-01")], "value", 
                  ylab = "PM10", deseason = TRUE,
                  xlab = "Year",
                  date.format = "%Y",
                  date.breaks = 4)

t <- getModelTrendTable(oaPM10, fname = "SPM10")

ft <- dcast(t[date >= as.Date("2020-01-01") & date < as.Date("2020-06-01")],
              date ~ ., value.var = c("diff","pcDiff"))
ft[, date := format.Date(date, format = "%b %Y")]
kableExtra::kable(ft, caption = "Units and % above/below expected", digits = 2) %>%
  kable_styling()

Figure \@ref(fig:pm10recent) shows the most recent hourly data.

recentDT <- pm10dt[!is.na(value) & obsDate > myParams$recentCutDate]
p <- makeDotPlot(recentDT, 
                 xVar = "dateTimeUTC", 
                 xLab = "Date & Time",
                 yVar = "value", 
                 byVar = "site", 
                 yLab = yLab)

p <- p +
scale_x_datetime(date_breaks = "2 day", date_labels =  "%a %d %b")  +
  theme(axis.text.x=element_text(angle=90, hjust=1)) +
  labs(caption = paste0(myParams$lockdownCap, myParams$weekendCap,myParams$noThresh))

yMax <- max(recentDT$value)
yMin <- min(recentDT$value)
p <- addLockdownRectDateTime(p, yMin, yMax)
addWeekendsDateTime(p, yMin, yMax)

Figure \@ref(fig:pm10recentProfile) shows the most recent hourly data by date and time of day.

recentDT[, time := hms::as_hms(dateTimeUTC)]

yMin <- min(recentDT$time)
yMax <- max(recentDT$time)
p <- profileTilePlot(recentDT, yLab)
addWeekendsDate(p, yMin, yMax) + 
  labs(caption = paste0(myParams$lockdownCap, myParams$weekendCap))

Figure \@ref(fig:pm10compare) shows the most recent mean daily values compared to previous years.

plotDT <- pm10dt[site %like% "via AURN" & fixedDate <= lubridate::today() & 
                  fixedDate >= myParams$comparePlotCut , 
                .(meanVal = mean(value),
                  medianVal = median(value),
                  nSites = uniqueN(site)), 
                keyby = .(fixedDate, compareYear, site)]

# final plot - adds annotations
yMin <- min(plotDT$mean)
yMax <- max(plotDT$mean)

p <- compareYearsPlot(plotDT, xVar = "fixedDate", 
                      yVar = "meanVal",
                      colVar = "compareYear")
p <- addLockdownRectDate(p, yMin, yMax) +
  geom_hline(yintercept = myParams$dailyPm10Threshold_WHO) +
  labs(x = "Date", y = "Daily mean", 
       caption = paste0(myParams$lockdownCap, myParams$weekendCap,
                        "\nReference line = WHO daily threshold (", myParams$dailyPm10Threshold_WHO, ")")
       )
p <- addWeekendsDate(p, yMin, yMax)
p + facet_grid(site ~ .) +
  theme(strip.text.y.right = element_text(angle = 90))

Figure \@ref(fig:pm10pcDiffcompareDay) and \@ref(fig:pm10pcDiffcompareWeek) show the % difference between the daily and weekly means for 2020 vs 2017-2019 (reference period).

dailyDT <- makeDailyComparisonDT(pm10dt[site %like% "via AURN" & fixedDate >= myParams$comparePlotCut])

p <- compareYearsDiffPlotDaily(dailyDT) +
  labs(caption = paste0(myParams$lockdownCap))

yMin <- min(dailyDT$pcDiffMean)
yMax <- max(dailyDT$pcDiffMean)
print(paste0("Max drop %:", round(yMin)))
print(paste0("Max increase %:", round(yMax)))

p <- addLockdownRectDate(p, yMin, yMax)
addWeekendsDate(p, yMin, yMax)

weeklyDT <- makeWeeklyComparisonDT(pm10dt[site %like% "via AURN" & fixedDate >= myParams$comparePlotCut])

p <- compareYearsDiffPlotWeekly(weeklyDT,ldStart = myParams$lockDownStartDate,
                           ldEnd = myParams$lockDownEndDate) +
  labs(caption = paste0(myParams$lockdownCap))

yMin <- min(weeklyDT$pcDiffMean)
yMax <- max(weeklyDT$pcDiffMean)
print(paste0("Max drop %:", round(yMin)))
print(paste0("Max increase %:", round(yMax)))
addLockdownRectWeek(p, yMin, yMax)

Beware seasonal trends and meteorological affects

PM 2.5

yLab <- "PM 2.5 (ug/m3)"
pm25dt <- fixedDT[pollutant == "pm2.5"]

Figure \@ref(fig:theilSenPM25) shows the PM10 trend over time. Is lockdown below trend?

pm25dt[, date := as.Date(dateTimeUTC)]  # set date to date for this one

oaPM25 <- openair::TheilSen(pm25dt[date < as.Date("2020-06-01")], "value", 
                  ylab = "PM2.5", deseason = TRUE,
                  xlab = "Year",
                  date.format = "%Y",
                  date.breaks = 4)

t <- getModelTrendTable(oaPM25, fname = "PM2.5")

ft <- dcast(t[date >= as.Date("2020-01-01") & date < as.Date("2020-06-01")],
              date ~ ., value.var = c("diff","pcDiff"))
ft[, date := format.Date(date, format = "%b %Y")]
kableExtra::kable(ft, caption = "Units and % above/below expected", digits = 2) %>%
  kable_styling()

Figure \@ref(fig:pm25recent) shows the most recent hourly data.

recentDT <- pm25dt[!is.na(value) & obsDate > myParams$recentCutDate]
p <- makeDotPlot(recentDT, 
                 xVar = "dateTimeUTC", 
                 xLab = "Date & Time",
                 yVar = "value", 
                 byVar = "site", 
                 yLab = yLab)

p <- p +
  scale_x_datetime(date_breaks = "2 day", date_labels =  "%a %d %b")  +
  theme(axis.text.x=element_text(angle=90, hjust=1)) +
  labs(caption = paste0(myParams$lockdownCap, myParams$weekendCap, myParams$noThresh))

yMax <- max(recentDT$value)
yMin <- min(recentDT$value)
p <- addLockdownRectDateTime(p, yMin, yMax)
addWeekendsDateTime(p, yMin, yMax)

Figure \@ref(fig:pm25recentProfile) shows the most recent hourly data by date and time of day.

recentDT[, time := hms::as_hms(dateTimeUTC)]

yMin <- min(recentDT$time)
yMax <- max(recentDT$time)
p <- profileTilePlot(recentDT, yLab)
addWeekendsDate(p, yMin, yMax) + 
  labs(caption = paste0(myParams$lockdownCap, myParams$weekendCap))

Figure \@ref(fig:pm25compare) shows the most recent mean daily values compared to previous years.

plotDT <- pm25dt[site %like% "via AURN" & fixedDate <= lubridate::today() & 
                  fixedDate >= myParams$comparePlotCut , 
                .(meanVal = mean(value),
                  medianVal = median(value),
                  nSites = uniqueN(site)), 
                keyby = .(fixedDate, compareYear, site)]

# final plot - adds annotations
yMin <- min(plotDT$mean)
yMax <- max(plotDT$mean)

p <- compareYearsPlot(plotDT, xVar = "fixedDate", 
                      yVar = "meanVal",
                      colVar = "compareYear")
p <- addLockdownRectDate(p, yMin, yMax) +
  geom_hline(yintercept = myParams$dailyPm2.5Threshold_WHO) +
  labs(x = "Date", y = "Daily mean", 
       caption = paste0(myParams$lockdownCap, myParams$weekendCap,
                        "\nReference line = WHO daily threshold (", myParams$dailyPm2.5Threshold_WHO,")")
       )
p <- addWeekendsDate(p, yMin, yMax)
p + facet_grid(site ~ .) +
  theme(strip.text.y.right = element_text(angle = 90))

Figure \@ref(fig:pm25pcDiffcompareDay) and \@ref(fig:pm25pcDiffcompareWeek) show the % difference between the daily and weekly means for 2020 vs 2017-2019 (reference period).

dailyDT <- makeDailyComparisonDT(pm25dt[site %like% "via AURN" & fixedDate >= myParams$comparePlotCut])

p <- compareYearsDiffPlotDaily(dailyDT) +
  labs(caption = paste0(myParams$lockdownCap))

yMin <- min(dailyDT$pcDiffMean)
yMax <- max(dailyDT$pcDiffMean)
print(paste0("Max drop %:", round(yMin)))
print(paste0("Max increase %:", round(yMax)))

p <- addLockdownRectDate(p, yMin, yMax)
addWeekendsDate(p, yMin, yMax)

weeklyDT <- makeWeeklyComparisonDT(pm25dt[site %like% "via AURN" & fixedDate >= myParams$comparePlotCut])

p <- compareYearsDiffPlotWeekly(weeklyDT,
                                ldStart = myParams$lockDownStartDate,
                           ldEnd = myParams$lockDownEndDate) +
  labs(caption = paste0(myParams$lockdownCap))

yMin <- min(weeklyDT$pcDiffMean)
yMax <- max(weeklyDT$pcDiffMean)
print(paste0("Max drop %:", round(yMin)))
print(paste0("Max increase %:", round(yMax)))
addLockdownRectWeek(p, yMin, yMax)

Beware seasonal trends and meteorological affects

Wind direction and speed

As noted above, air pollution levels in any given time period are highly dependent on the prevailing meteorological conditions.

Figure \@ref(fig:recentWind) shows the wind direction and speed over the period of lockdown and can be compared with the equivalent pollutant level plots above such as Figure \@ref(fig:no2recent).

# windDT[, .(mean = mean(wd)), keyby = .(site)] # they're identical across AURN sites
windDirDT <- fixedDT[pollutant == "wd" & site %like% "A33"]
windDirDT[, `:=`(wd, value)]
setkey(windDirDT, dateTimeUTC, site, source)
windSpeedDT <- fixedDT[pollutant == "ws" & site %like% "A33"]
windSpeedDT[, `:=`(ws, value)]
setkey(windSpeedDT, dateTimeUTC, site, source)

windDT <- windSpeedDT[windDirDT]
windDT[, `:=`(rTime, hms::as_hms(dateTimeUTC))]
p <- ggplot2::ggplot(windDT[obsDate > as.Date("2020-03-23")], aes(x = dateTimeUTC, y = ws, angle = -wd + 90, 
                                                                  colour = ws)) + geom_text(label = "→") + theme(legend.position = "bottom") + guides(colour = guide_legend(title = "Wind speed")) + 
  scale_color_continuous(high = "#132B43", low = "#56B1F7") # normal blue reversed


yMin <- min(windDT[obsDate > as.Date("2020-03-23")]$ws)
yMax <- max(windDT[obsDate > as.Date("2020-03-23")]$ws)
p <- addWeekendsDateTime(p, yMin, yMax)
#p <- addLockdownRectDateTime(p, yMin, yMax)
p <- p + labs(y = "Wind speed", 
         x = "Time", 
         caption = paste0(myParams$weekendCap)) + 
  theme(axis.text.x=element_text(angle=90, hjust=1, size = 9))
p +
  xlim(lubridate::as_datetime("2020-03-23 23:59:59"), 
       NA)  # do this last otherwise adding the weekends takes the plot back to the earliest weekend we annotate

Figure \@ref(fig:recentWindRose) shows a windrose for each of the periods of low/high pollutant levels visible in Figure \@ref(fig:no2pcDiffcompareDay):

26 March - 4 April (lower NO2)
7 April - 12 April (higher NO2)
16 April - 23 April (lower NO2)

The windroses indicate the direction the prevailing wind was blowing from and the colour of the 'paddles' indicates the strength while the length of the paddles indicates the proportion of observations. As we can see there are clear differences in the wind conditions which correlate with the pollution patterns observed:

the first period with low NO2 was dominated by north-north easterly winds (likely to bring city and motorway air);
the second period when NO2 and particulates were high was dominated by low speed south easterly winds (bringing continental air);
the third period when NO2 was low was again dominated by north easterly winds.

fixedDT[, aqPeriod := ifelse(obsDate >= as.Date("2020-03-26") & 
                                  obsDate <= as.Date("2020-04-04"),
                               "Period 1 - Low: 26/3 - 4/4", NA)]
fixedDT[, aqPeriod := ifelse(obsDate >= as.Date("2020-04-07") & 
                                  obsDate <= as.Date("2020-04-12"),
                               "Period 2 - High: 7/4 - 12/4", aqPeriod)]
fixedDT[, aqPeriod := ifelse(obsDate >= as.Date("2020-04-16") & 
                                  obsDate <= as.Date("2020-04-23"),
                               "Period 3 - Low: 16/4 - 23/4", aqPeriod)]

plotDT <- fixedDT[!is.na(aqPeriod) & (pollutant == "ws" | pollutant == "wd") & site %like% "A33"]

t <- plotDT[, .(start = min(dateTimeUTC),
           end = max(dateTimeUTC)), keyby = .(aqPeriod)]

kableExtra::kable(t, caption = "Check period start/end times") %>%
  kable_styling()


# make a dt openair will accept
wdDT <- plotDT[pollutant == "wd", .(dateTimeUTC, site, wd = value, aqPeriod)]
setkey(wdDT, dateTimeUTC, site, aqPeriod)
wsDT <- plotDT[pollutant == "ws", .(dateTimeUTC, site, ws = value, aqPeriod)]
setkey(wsDT, dateTimeUTC, site, aqPeriod)
wrDT <- wdDT[wsDT]
openair::windRose(wrDT, type = "aqPeriod")

Save data

Save long form fixed-date data to savedData for re-use.

fixedDT[, weekDay := lubridate::wday(dateTimeUTC, label = TRUE, abbr = TRUE)]
f <- paste0(here::here(), "/savedData/sotonExtract2017_2020_v2.csv")
data.table::fwrite(fixedDT, f)
dkUtils::gzipIt(f)

Saved data description:

skimr::skim(fixedDT)

Saved data sites by year:

t <- table(fixedDT$site, fixedDT$year)

kableExtra::kable(t, caption = "Sites available by year") %>%
  kable_styling()

Saved pollutants by site:

t <- table(fixedDT$site, fixedDT$pollutant)

kableExtra::kable(t, caption = "Pollutants available by site") %>%
  kable_styling()

NB:

ws = wind speed
wd = wind direction
v* = volatiles

We have also produced wind/pollution roses for these sites.

About

Code

Source:

https://github.com/dataknut/airQual

History:

https://github.com/dataknut/airQual/commits/master

Comments and feedback

If you wish to comment please open an issue:

https://github.com/dataknut/airQual/issues

Citation

If you wish to refer to any of the material from this report please cite as:

Anderson, B., (r format(Sys.time(), format = "%Y")) r params$title: r params$subtitle , Sustainable Energy Research Group, University of Southampton: Southampton, UK.

Report circulation:

Public

This work is (c) r format(Sys.time(), format = "%Y") the University of Southampton and is part of a collection of air quality data analyses.

Annex

Missing data {#annexMissing}

Several of these datasets suffer from missing data or have stopped updating. This is visualised below for all data for all sites from January 2020.

For example \@ref(fig:testNegNo2) shows missing data patterns for Nitrogen Dioxide.

# dt,xvar, yvar,fillVar, yLab
yLab <- "NO2"
tileDT <- fixedDT[pollutant == "no2" & 
                       dateTimeUTC > as.Date("2020-02-01") &
                       !is.na(value)]
p <- makeTilePlot(tileDT, xVar = "dateTimeUTC", xLab = "Date & Time", 
                  yVar = "site",
                  fillVar = "value",
                  yLab = yLab)

p + scale_x_datetime(date_breaks = "7 day", date_labels =  "%a %d %b")  +
    theme(axis.text.x=element_text(angle=90, hjust=1))

yLab <- "NOx"
# dt,xvar, yvar,fillVar, yLab
tileDT <- fixedDT[pollutant == "nox" & 
                       dateTimeUTC > as.Date("2020-02-01") &
                       !is.na(value)]
p <- makeTilePlot(tileDT, xVar = "dateTimeUTC", xLab = "Date & Time", 
                  yVar = "site",
                  fillVar = "value",
                  yLab = yLab)

p + scale_x_datetime(date_breaks = "7 day", date_labels =  "%a %d %b")  +
    theme(axis.text.x=element_text(angle=90, hjust=1))

yLab <- "SO2"
# dt,xvar, yvar,fillVar, yLab
tileDT <- fixedDT[pollutant == "so2" & 
                       dateTimeUTC > as.Date("2020-02-01") &
                       !is.na(value)]
p <- makeTilePlot(tileDT, xVar = "dateTimeUTC", xLab = "Date & Time", 
                  yVar = "site",
                  fillVar = "value",
                  yLab = yLab)

p + scale_x_datetime(date_breaks = "7 day", date_labels =  "%a %d %b")  +
    theme(axis.text.x=element_text(angle=90, hjust=1))

yLab <- "O3"
tileDT <- fixedDT[pollutant == "o3" & 
                       dateTimeUTC > as.Date("2020-02-01") &
                       !is.na(value)]
p <- makeTilePlot(tileDT, xVar = "dateTimeUTC", xLab = "Date & Time", 
                  yVar = "site",
                  fillVar = "value",
                  yLab = yLab)

p + scale_x_datetime(date_breaks = "7 day", date_labels =  "%a %d %b")  +
    theme(axis.text.x=element_text(angle=90, hjust=1))

yLab <- "PM10"
tileDT <- fixedDT[pollutant == "pm10" & 
                       dateTimeUTC > as.Date("2020-02-01") &
                       !is.na(value)]
p <- makeTilePlot(tileDT, xVar = "dateTimeUTC", xLab = "Date & Time", 
                  yVar = "site",
                  fillVar = "value",
                  yLab = yLab)

p + scale_x_datetime(date_breaks = "7 day", date_labels =  "%a %d %b")  +
    theme(axis.text.x=element_text(angle=90, hjust=1))

yLab <- "PM2.5"
tileDT <- fixedDT[pollutant == "pm2.5" & 
                       dateTimeUTC > as.Date("2020-02-01") &
                       !is.na(value)]
p <- makeTilePlot(tileDT, xVar = "dateTimeUTC", xLab = "Date & Time", 
                  yVar = "site",
                  fillVar = "value",
                  yLab = yLab)

p + scale_x_datetime(date_breaks = "7 day", date_labels =  "%a %d %b")  +
    theme(axis.text.x=element_text(angle=90, hjust=1))

Ship activity

Exploration of correlations between wind direction, recorded ship activity and pollutants.

Data:

Air quality: AURN
Ship counts: Western docks forum
WHO reference thresholds: https://www.who.int/news-room/fact-sheets/detail/ambient-(outdoor)-air-quality-and-health

It is important to consider the relative locations of the air quality stations and the port activities when interpreting this data. The map below shows the rough location of the stations (coloured circles: green 2 = A33, orange 4 = City Centre) as well as the locations of monitoring stations that, as of June 4th 2020, are not collecting data.

Latest air quality snaphot (June 4th 2020)

shipsDailyDT <- data.table::fread(paste0(aqParams$SCCdataPath, "/shipNumbers/shipNumbersSouthampton.csv"))
shipsDailyDT[, rDate := lubridate::dmy(Date)]
shipsDailyDT <- shipsDailyDT[!is.na(rDate)]
#summary(shipsDailyDT$rDate)

shipsDailyDT[, allShips := cargo + max_cruise]
shipsDailyDT[, allShipsCoded := ifelse(allShips == 0, "0" , NA)
       ]
shipsDailyDT[, allShipsCoded := ifelse(allShips == 1 | 
                                    allShips == 2, "1-2" , allShipsCoded)
       ]
shipsDailyDT[, allShipsCoded := ifelse(allShips == 3 | 
                                    allShips == 4, "3-4" , allShipsCoded)
       ]
shipsDailyDT[, allShipsCoded := ifelse(allShips == 5 |
                                         allShips == 6, "5-6" , allShipsCoded)
       ]
shipsDailyDT[, allShipsCoded := ifelse(allShips > 6, "7+" , allShipsCoded)
       ]

# re-code all ships to something more intuitive than openair does
shipsDailyDT[, maxCruiseCoded := ifelse(max_cruise == 0, "0" , NA)
       ]
shipsDailyDT[, maxCruiseCoded := ifelse(max_cruise == 1 | 
                                    max_cruise == 2, "1-2" , maxCruiseCoded)
       ]
shipsDailyDT[, maxCruiseCoded := ifelse(max_cruise == 3 | 
                                    max_cruise == 4, "3-4" , maxCruiseCoded)
       ]
shipsDailyDT[, maxCruiseCoded := ifelse(max_cruise > 4, "5+" , maxCruiseCoded)
       ]
t <- with(shipsDailyDT, table(maxCruiseCoded,allShipsCoded))

kableExtra::kable(addmargins(t), caption = "Number of days with max n cruise ships (rows) vs all ships") %>%
  kable_styling()

dailyNO2DT <- no2dt[obsDate >= as.Date("2020-04-01"), 
                  .(meanNO2 = mean(value),
                    maxNO2 = max(value),
                    nObs = .N), keyby = .(site, rDate = obsDate)]
dailyPM10DT <- pm10dt[obsDate >= as.Date("2020-04-01"), 
                  .(meanPM10 = mean(value),
                    maxPM10 = max(value),
                    nObs = .N), keyby = .(site, rDate = obsDate)]
#summary(dailyNO2DT$rDate)

windDirDT <- fixedDT[pollutant == "wd", .(site, dateTimeUTC, wd = value)]
windSpeedDT <- fixedDT[pollutant == "ws", .(site, dateTimeUTC, ws = value)]
setkey(windDirDT, site, dateTimeUTC)
setkey(windSpeedDT, site, dateTimeUTC)
windDT <- windSpeedDT[windDirDT]
dailyWindDT <- windDT[dateTimeUTC >= as.Date("2020-04-01"), 
                         .(meanWd = mean(wd),
                           meanWs = mean(ws),
                           nObs = .N), keyby=.(site, rDate = as.Date(dateTimeUTC))]


setkey(shipsDailyDT, rDate)
setkey(dailyNO2DT, rDate)
setkey(dailyPM10DT, rDate)

plotNO2DT <- shipsDailyDT[dailyNO2DT]
plotNO2DT <- plotNO2DT[!is.na(cruise)] # filter out

plotPM10DT <- shipsDailyDT[dailyPM10DT]
plotPM10DT <- plotPM10DT[!is.na(cruise)] # filter out

setkey(dailyWindDT,site, rDate)
setkey(plotNO2DT, site, rDate)
plotNO2DT <- dailyWindDT[plotNO2DT[site %like% "AURN"]] # keep AURN only

setkey(dailyWindDT,site, rDate)
setkey(plotPM10DT, site, rDate)
plotPM10DT <- dailyWindDT[plotPM10DT[site %like% "AURN"]] # keep AURN only
#nrow(plotDT)

Table \@ref(tab:loadShipsData) shows the number of days for which there are different maximum numbers of cruise ships (rows) and total cargo and cruise ships (columns). There is only 1 day when there are 0 cruise ships and no days when there are 0 ships. The sparseness of the data (only r uniqueN(shipsDailyDT$rDate) days) and the relative lack of variation in ship numbers means that a relationship between ship numbers and pollution will be difficult to detect.

In addition, and most importantly, these analyses only show correlations. It it quite possible, for example, that higher pollution levels are due to prevailing environmental/meteorological conditions that happen to coincide with more ships.

NO2

Figures \@ref(fig:meanNo2BoxPlotsCruise) and \@ref(fig:maxNo2BoxPlotsCruise) shows the distribution of mean and max daily NO2 by maximum cruise ship counts for the day. There are no clear relationships.

myCap <- "April & May 2020 \nSources: AURN hourly observations & Western docks forum"

ggplot2::ggplot(plotNO2DT, aes(x = maxCruiseCoded, y = meanNO2, 
                            group = maxCruiseCoded, color = site)) +
  geom_boxplot() +
  labs(x = "Max number of cruise ships",
       y = "Mean daily NO2",
       caption = myCap) +
  facet_grid(. ~ site)

ggplot2::ggplot(plotNO2DT, aes(x = maxCruiseCoded, y = maxNO2, 
                            group = maxCruiseCoded, color = site)) +
  geom_boxplot() +
  labs(x = "Max number of cruise ships",
       y = "Max daily NO2",
       caption = paste0(myCap, "\nReference line: WHO hourly threshold")) +
  facet_grid(. ~ site) +
  geom_hline(yintercept = myParams$hourlyNo2Threshold_WHO)

Figures \@ref(fig:meanNo2BoxPlotsAll) and \@ref(fig:maxNo2BoxPlotsAll) repeats this for all ships. In this case there appears to be slightly more of a rising trend as the number of ships increases.

ggplot2::ggplot(plotNO2DT, aes(x = allShipsCoded, y = meanNO2, 
                            group = allShipsCoded, color = site)) +
  geom_boxplot() +
  labs(x = "Max number of ships",
       y = "Mean daily NO2 (hourly observations)",
       caption = myCap) +
  facet_grid(. ~ site)

ggplot2::ggplot(plotNO2DT, aes(x = allShipsCoded, y = maxNO2, 
                            group = allShipsCoded, color = site)) +
  geom_boxplot() +
  labs(x = "Max number of ships",
       y = "Max daily NO2 (hourly observations)",
       caption = paste0(myCap, "\nReference line: WHO hourly threshold")) +
  facet_grid(. ~ site) +
  geom_hline(yintercept = myParams$hourlyNo2Threshold_WHO)

Figures \@ref(fig:meanNO2pollutionRoseCruise) and \@ref(fig:maxNO2pollutionRoseCruise) show pollution roses for daily mean and daily max NO2 by the maximum number of cruise ships alongside per day. These show the most frequent wind directions (wind rose) overlain by the proportion of pollution concentrations in the calculated groups.

It appears that when the wind is from the South East/East and more cruise ships are alongside then higher levels of NO2 are more frequent. But this could also be due to wind-blown continental pollution when the wind is from this direction as was known to be the case during April 2020.

plotNO2DT[, wd := meanWd]
plotNO2DT[, ws := meanWs]
plotNO2DT[site %like% "A33", shortSite := "A33"]
plotNO2DT[site %like% "Centre", shortSite := "Centre"]

openair::pollutionRose(plotNO2DT[max_cruise !=0], 
                       pollutant = "meanNO2", type = c("maxCruiseCoded","shortSite")
                       )

openair::pollutionRose(plotNO2DT[max_cruise !=0], 
                       pollutant = "maxNO2", type = c("maxCruiseCoded","shortSite")
                       )

Figures \@ref(fig:meanNO2pollutionRoseAll) and \@ref(fig:maxNO2pollutionRoseAll) show pollution roses for daily mean and daily max NO2 by the number of all ships alongside per day. As we would expect, these plots appear to show a similar effect.

openair::pollutionRose(plotNO2DT, 
                       pollutant = "meanNO2", type = c("allShipsCoded","shortSite"))

openair::pollutionRose(plotNO2DT, 
                       pollutant = "maxNO2", type = c("allShipsCoded","shortSite"))

PM10

Figures \@ref(fig:meanPm10BoxPlots) and \@ref(fig:maxPm10BoxPlots) show the distribution of mean and max daily PM10 by maximum cruise ship counts for the day. There is no clear relationship.

ggplot2::ggplot(plotPM10DT, aes(x = maxCruiseCoded, y = meanPM10, 
                            group = maxCruiseCoded, color = site)) +
  geom_boxplot() +
  labs(x = "Max number of cruise ships",
       y = "Mean daily PM10",
       caption = paste0(myCap, "\nReference line: WHO 24 hour mean threshold")) +
  facet_grid(. ~ site) +
  geom_hline(yintercept = myParams$dailyPm10Threshold_WHO)

ggplot2::ggplot(plotPM10DT, aes(x = maxCruiseCoded, y = maxPM10, 
                            group = maxCruiseCoded, color = site)) +
  geom_boxplot() +
  labs(x = "Max number of cruise ships",
       y = "Max daily PM10",
       caption = myCap) +
  facet_grid(. ~ site)

Figures \@ref(fig:meanPm10BoxPlotsAll) and \@ref(fig:maxPm10BoxPlotsAll) repeat this for all ships. In this case there appears to be a stronger relationship.

myCap <- "April & May 2020 (Sources: AURN & Western docks forum)"

ggplot2::ggplot(plotPM10DT, aes(x = allShipsCoded, y = meanPM10, 
                            group = allShipsCoded, color = site)) +
  geom_boxplot() +
  labs(x = "Max number of ships",
       y = "Mean daily PM10",
       caption = paste0(myCap, "\nReference line: WHO 24 hour mean threshold")) +
  facet_grid(. ~ site) +
  geom_hline(yintercept = myParams$dailyPm10Threshold_WHO)

ggplot2::ggplot(plotPM10DT, aes(x = allShipsCoded, y = maxPM10, 
                            group = allShipsCoded, color = site)) +
  geom_boxplot() +
  labs(x = "Max number of ships",
       y = "Max daily PM10 ",
       caption = myCap) +
  facet_grid(. ~ site)

Figures \@ref(fig:meanPm10pollutionRoseCruise) and \@ref(fig:maxPm10pollutionRoseCruise) show pollution roses for daily mean and daily max PM10 by the maximum number of cruise ships alongside per day.

It appears that when the wind is from the South East/East and more cruise ships are alongside then higher levels of PM10 are more frequent. But this is also the case when the wind is from the West in the 3-4 ships category.

plotPM10DT[, wd := meanWd]
plotPM10DT[, ws := meanWs]
plotPM10DT[site %like% "A33", shortSite := "A33"]
plotPM10DT[site %like% "Centre", shortSite := "Centre"]

openair::pollutionRose(plotPM10DT[max_cruise !=0], 
                       pollutant = "meanPM10", type = c("maxCruiseCoded","shortSite")
                       )

openair::pollutionRose(plotPM10DT[max_cruise !=0], 
                       pollutant = "maxPM10", type = c("maxCruiseCoded","shortSite")
                       )

Finally, Figures \@ref(fig:meanPm10pollutionRoseAll) and Figures \@ref(fig:maxPm10pollutionRoseAll) show a pollution rose for daily mean and daily max PM10 by the number of all ships alongside per day. These plots appear to show a similar effect. As before it is not possible to discount the potential confounding effects of overall environmental conditions which the South East and Easterly wind directions can produce.

openair::pollutionRose(plotPM10DT, 
                       pollutant = "meanPM10", type = c("allShipsCoded","shortSite"))

openair::pollutionRose(plotPM10DT, 
                       pollutant = "maxPM10", type = c("allShipsCoded","shortSite"))

Focus on South to East winds

Given the above pollution rose analysis it is worth repeating the max NO2 and max PM10 box plots but splitting the data by wind quadrant. Note that this still does not remove the potential confounding effect of continental air...

To do this we code the wind direction as:

"Q1: N -> ENE" wind from 0 - 70 degrees
"Q2: ENE -> S" 70 - 180
"Q3-4: S -> N" 180 - 360

Figure \@ref(fig:quadrantMeanNO2) shows mean NO2 split by wind quadrant and site while \@ref(fig:quadrantMeanPM10) shows mean PM10. Unfortunately the number of observations are so small that we really cannot see any pattern.

setWindQuad <- function(dt){
  # assumes wd = degrees
  # openair default is 30 degrees
  dt[, windDir := ifelse(wd >= 0 & wd < 70, "Q1: N -> ENE", NA)]
  dt[, windDir := ifelse(wd >= 70 & wd < 180, "Q2: ENE -> S", windDir)]
  dt[, windDir := ifelse(wd >= 180 & wd < 360, "Q3-4: S -> N", windDir)]
  return(dt)
}
plotNO2DT <- setWindQuad(plotNO2DT)

ggplot2::ggplot(plotNO2DT, aes(x = allShipsCoded, y = meanNO2, 
                            group = maxCruiseCoded, color = site)) +
  geom_boxplot() +
  labs(x = "Max number of ships",
       y = "Mean daily NO2",
       caption = myCap) +
  facet_grid(windDir ~ site)

plotPM10DT <- setWindQuad(plotPM10DT)

ggplot2::ggplot(plotPM10DT, aes(x = allShipsCoded, y = meanPM10, 
                            group = maxCruiseCoded, color = site)) +
  geom_boxplot() +
  labs(x = "Max number of ships",
       y = "Mean daily PM10",
       caption = myCap) +
  facet_grid(windDir ~ site)

Models

It is not clear that linear modelling is an appropriate method to use since the observations are not independent - the value at time t is closely related to the value at time t + 1.

# Need to convert to wide so we can subtract centre (backround)
table(fixedDT$site)

fixedDT[, shortSite := ifelse(site %like% "A33", "A33", "Centre")]
a33DT <- fixedDT[site %like% "A33" & year > 2019, .(dateTimeUTC, obsDate, shortSite, pollutant, value)]
centreDT <- fixedDT[site %like% "Centre" & year > 2019, .(dateTimeUTC, obsDate, shortSite, pollutant, value)]

a33DTw <- dcast(a33DT, dateTimeUTC + shortSite ~ pollutant + shortSite, 
                value.var = "value")
centreDTw <- dcast(centreDT, dateTimeUTC + shortSite ~ pollutant + shortSite, 
                value.var = "value")

a33DTw[,shortSite := NULL] # don't need
setkey(a33DTw, dateTimeUTC)
centreDTw[,shortSite := NULL]
setkey(centreDTw, dateTimeUTC)

wideFixedDT <- a33DTw[centreDTw]

# differencing - treat Centre as 'background'

wideFixedDT[, NO2_A33diff := no2_A33 - no2_Centre]
wideFixedDT[, obsDate := as.Date(dateTimeUTC)]
wideDailyDT <- wideFixedDT[, .(no2_A33 = mean(no2_A33),
                               pm10_A33 = mean(pm10_A33),
                               no2_Centre = mean(no2_Centre),
                               pm10_Centre = mean(pm10_Centre),
                               wd = mean(wd_A33), # these will be the same
                               ws = mean(ws_A33)
                               ), keyby = .(obsDate)
                           ]
wideDailyDT[,NO2_A33diff := no2_A33 - no2_Centre]
plotDT <- wideDailyDT[shipsDailyDT]

In any case the table below shows the results of estimating a linear regression model of:

NO2 (A33 site) = total ships * wind quadrant

This should show:

the overall effects of total ships and wind quadrants
what effect of the interaction between total ships and wind quadrant has

The results are more or less what we would expect given the small number of observations:

the more ships there are, the lower the NO2 at the A33 site but this is not statistically significant (95% confidence intervals include 0);
compared to the N -> ENE (contrast category omitted from the results), the ENE -> S wind quadrant is associated with nearly 8 times lower NO2 at the A33 site but this is not statistically significant;
if the wind is ENE -> S then every extra ship increased NO2 at the A33 site by 5 units but this is also not statistically significant

plotDT <- setWindQuad(plotDT)
plotDT[, date := obsDate]
#openair::linearRelation(plotDT, x ="no2_Centre", y = "no2_A33")

lmA33 <- lm(no2_A33 ~ allShips*windDir, plotDT)

stargazer::stargazer(lmA33, type = "text",
                     title = "Model results",
          ci = TRUE, 
          single.row = TRUE)

# do not run - fails
# The 'car' package has some nice graphs to help here
car::qqPlot(lmA33) # shows default 95% CI
car::spreadLevelPlot(lmA33)
message("# Do we think the variance of the residuals is constant?")
message("# Did the plot suggest a transformation?")

message("# autocorrelation/independence of errors")
car::durbinWatsonTest(lmA33)
message("# if p < 0.05 then a problem as implies autocorrelation ut beware small samples")

However the diagnostic durbinWatsonTest test suggests autocorrelation, as we would expect. This means the standard errors are likely to be underestimates and so the confidence intervals are too narrow.

message("# homoskedasticity: plot (should be no obvious pattern")
plot(plotDT[!is.na(no2_A33)]$no2_A33,lmA33$residuals)
abline(h = mean(lmA33$residuals), col = "red") # add the mean of the residuals (yay, it's zero!)
message("# homoskedasticity: formal test")
car::ncvTest(lmA33)
message("# if p > 0.05 then there is heteroskedasticity")

message("# -> collinearity")
car::vif(lmA33)
# if any values > 10 -> problem
message("# -> tolerance")
1/car::vif(lmA33)
message("if any values < 0.2 -> possible problem")
message("if any values < 0.1 -> definitely a problem")

In contrast, if we repeat this model for the city centre then we get more substantive results. In this case:

the more ships there are, the lower the NO2 at the City Centre site but this is not statistically significant (95% confidence intervals include 0);
compared to the N -> ENE (contrast category omitted from the results), the ENE -> S wind quadrant is associated with nearly 32 times lower NO2 at the City Centre site but this is not statistically significant;
if the wind is ENE -> S then every extra ship increased NO2 at the City Centre site by 7 units and this is statistically significant although the 95% confidence intervals are quite wide (0.993-13.323)
if the wind is in the S -> N quadrant (i.e. the prevailing westerlies) then every extra ship increased NO2 at the City Centre site by 6 units but this is not statistically significant at the 95% level

#openair::linearRelation(plotDT, x ="no2_Centre", y = "no2_A33")

lmCentre <- lm(no2_Centre ~ allShips*windDir, plotDT)

stargazer::stargazer(lmCentre, type = "text",
                     title = "Model results",
          ci = TRUE, 
          single.row = TRUE)

# The 'car' package has some nice graphs to help here
car::qqPlot(lmCentre) # shows default 95% CI
car::spreadLevelPlot(lmCentre)
message("# Do we think the variance of the residuals is constant?")

message("# autocorrelation/independence of errors")
car::durbinWatsonTest(lmCentre)
message("# if p < 0.05 then a problem as implies autocorrelation but beware large samples")

Again however, the durbinWatsonTest test suggests autocorrelation. This means the standard errors are likely to be underestimates and so the confidence intervals are too narrow. As a result effects we think are statistically significant are probably not...

message("# homoskedasticity: plot (should be no obvious pattern")
plot(plotDT[!is.na(no2_Centre)]$no2_Centre,lmCentre$residuals)
abline(h = mean(lmCentre$residuals), col = "red") # add the mean of the residuals - ideally 0
message("# homoskedasticity: formal test")
car::ncvTest(lmCentre)
message("# if p > 0.05 then there is heteroskedasticity")

message("# -> collinearity")
car::vif(lmCentre)
# if any values > 10 -> problem
message("# -> tolerance")
1/car::vif(lmCentre)
message("# if any values < 0.2 -> possible problem")
message("# if any values < 0.1 -> definitely a problem")

Summary

Overall while there appear to be correlations between larger numbers of ships and more frequent higher pollution levels under some wind conditions, the small number of observations and possible confounding meteorological effects mean they could just be spurious.

The linear regression model results are indicative of a relationship but suffer from methodological problems (autocorrelation).

We may need a better measure of ship emissions, we need to take out the meteorological effects and we need an approach to time-series regression which can allow for autocorrelaton.

Experiments with openair

The openair R package [@openair] offers a number of pre-formed plot functions which are of potential use.

cutYear <- 2017 # use as comparison year(s)

NO2 trends with Theil-Sen

openair's Theil-Sen function provides de-seasoned trendlines. Figure \@ref(fig:TheilSenNo2Centre) shows the trend for NO2 in Southampton Centre over the last 3 years while Figure \@ref(fig:TheilSenNo2A33) shows the same plot but for the A33 site. Both sites show a faiirly consistent decline over time.

# looking for wide data
no2dt[, date := obsDate] # set date to date for this one
no2dt[, no2 := value] # looks for named variable

openair::TheilSen(no2dt[site %like% "Centre"], 
                  "no2", ylab = "NO2 (ppb)", deseason = TRUE, 
                  type = c("site", "weekday"))

openair::TheilSen(no2dt[site %like% "A33"],
                  "no2", ylab = "NO2 (ppb)", deseason = TRUE, 
                  type = c("site", "weekday"))

24 hour NO2 patterns with timeVariation

openair's timeVariation function provides plots by time of day and weekday. Figure \@ref(fig:timeVariationNo2Centre) shows the pattern for NO2 in Southampton Centre over the Figure \@ref(fig:timeVariationNo2A33) shows the same plot but for the A33 site. In each case we compare the period before lockdown (from the start of r cutYear) with the period since lockdown (from r myParams$lockDownStartDate).

These quite clearly show the difference in pollution for pre and during lockdown for specific times of day and days of the week. We can quite clearly see that NO2 has decreased at exactly the times we would expect given the reduction in transport use. This is especially apparent on Mondays and Tuesdays but less obvious on Wednesday to Friday (bottom right plot) with the weekly profile of NO2 emissions looking substantially different.

no2dt[, date := dateTimeUTC] # set date to dateTime for this one

no2dt[, lockdown := ifelse(dateTimeUTC <= myParams$lockDownStartDate, 
                           "Pre lockdown", "Lockdown")]

openair::timeVariation(no2dt[site %like% "Centre" & year >= cutYear], 
                       "no2", ylab = "NO2 (ppb)", 
                       group = "lockdown")

openair::timeVariation(no2dt[site %like% "A33" & year > cutYear], 
                       "no2", ylab = "NO2 (ppb)", 
                       group = "lockdown")

PM10 trends with Theil-Sen

openair's Theil-Sen function provides de-seasoned trendlines. Figure \@ref(fig:TheilSenPM10Centre) shows the trend for PM10 in Southampton Centre over the last 3 years while Figure \@ref(fig:TheilSenPM10A33) shows the same plot but for the A33 site.

pm10dt[, date := obsDate]
pm10dt[, pm10 := value]

openair::TheilSen(pm10dt[site %like% "Centre"], 
                  "pm10", ylab = "PM10", deseason = TRUE, 
                  type = c("site", "weekday"))

openair::TheilSen(pm10dt[site %like% "A33"], 
                  "pm10", ylab = "NO2 (ppb)", deseason = TRUE, 
                  type = c("site", "weekday"))

24 hour PM10 patterns with timeVariation

Figure \@ref(fig:timeVariationPM10Centre) shows the pattern for PM10 in Southampton Centre over the Figure \@ref(fig:timeVariationPM10A33) shows the same plot but for the A33 site. In each case we compare the period before lockdown (from the start of r cutYear) with the period since lockdown (from r myParams$lockDownStartDate).

Unlike NO2, PM10 has not declined presumably because it is not affected by reduced transport use etc.

pm10dt[, date := dateTimeUTC]
pm10dt[, lockdown := ifelse(dateTimeUTC <= myParams$lockDownStartDate, 
                            "Pre lockdown", "Lockdown")]

openair::timeVariation(pm10dt[site %like% "Centre" & year >= cutYear], 
                       "pm10", ylab = "PM10", 
                       group = "lockdown") # use group to give before/after

openair::timeVariation(pm10dt[site %like% "A33" & year > cutYear], 
                       "pm10", ylab = "PM10", 
                       group = "lockdown")

PM2.5 trends with Theil-Sen

openair's Theil-Sen function provides de-seasoned trendlines. Figure \@ref(fig:TheilSenPM25Centre) shows the trend for PM2.5 in Southampton Centre over the last 3 years.

pm25dt[, date := obsDate]
pm25dt[, pm25 := value]

openair::TheilSen(pm25dt[site %like% "Centre"], 
                  "pm25", ylab = "PM2.5", deseason = TRUE, 
                  type = c("site", "weekday"))

24 hour PM25 patterns with timeVariation

r cutYear <- 2019

openair's timeVariation function provides plots by time of day and weekday. Figure \@ref(fig:timeVariationPM25Centre) shows the pattern for PM25 in Southampton Centre. We compare the period before lockdown (from the start of r cutYear) with the period since lockdown (from r myParams$lockDownStartDate).

As with PM10, PM2.5 has not declined presumably because it is not affected by reduced transport use etc.

pm25dt[, date := dateTimeUTC]
pm25dt[, lockdown := ifelse(dateTimeUTC <= myParams$lockDownStartDate, 
                            "Pre lockdown", "Lockdown")]

openair::timeVariation(pm25dt[site %like% "Centre" & year >= cutYear], 
                       "pm25", ylab = "PM2.5", 
                       group = "lockdown")

Runtime

Report generated using knitr in RStudio with r R.version.string running on r R.version$platform (r Sys.info()[3]).

t <- proc.time() - myParams$startTime

elapsed <- t[[3]]

Analysis completed in r elapsed seconds ( r round(elapsed/60,2) minutes).

R packages used in this report:

data.table - [@data.table]
ggplot2 - [@ggplot2]
here - [@here]
kableExtra - [@kableExtra]
lubridate - [@lubridate]
openAir - [@openair]
skimr - [@skimr]
viridis - [@viridis]

References

CfSOtago/airQual documentation built on Nov. 13, 2020, 8:08 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

CfSOtago/airQual
Code to analyse Air Quality data in the UK & NZ over time

In CfSOtago/airQual: Code to analyse Air Quality data in the UK & NZ over time

Introduction

Data

WHO air quality thresholds

Nitrogen Dioxide (no2)

Oxides of Nitrogen (nox)

Sulphour Dioxide

Ozone

PM 10

PM 2.5

Wind direction and speed

Save data

About

Code

Comments and feedback

Citation

Annex

Missing data {#annexMissing}

Ship activity

NO2

PM10

Focus on South to East winds

Models

Summary

Experiments with openair

NO2 trends with Theil-Sen

24 hour NO2 patterns with timeVariation

PM10 trends with Theil-Sen

24 hour PM10 patterns with timeVariation

PM2.5 trends with Theil-Sen

24 hour PM25 patterns with timeVariation

Runtime

References

R Package Documentation

Browse R Packages

We want your feedback!

CfSOtago/airQual Code to analyse Air Quality data in the UK & NZ over time

In CfSOtago/airQual: Code to analyse Air Quality data in the UK & NZ over time

Introduction

Data

WHO air quality thresholds

Nitrogen Dioxide (no2)

Oxides of Nitrogen (nox)

Sulphour Dioxide

Ozone

PM 10

PM 2.5

Wind direction and speed

Save data

About

Code

Comments and feedback

Citation

Annex

Missing data {#annexMissing}

Ship activity

NO2

PM10

Focus on South to East winds

Models

Summary

Experiments with openair

NO2 trends with Theil-Sen

24 hour NO2 patterns with timeVariation

PM10 trends with Theil-Sen

24 hour PM10 patterns with timeVariation

PM2.5 trends with Theil-Sen

24 hour PM25 patterns with timeVariation

Runtime

References

R Package Documentation

Browse R Packages

We want your feedback!

CfSOtago/airQual
Code to analyse Air Quality data in the UK & NZ over time