# Set knitr options
knitr::opts_chunk$set(echo = FALSE,          # echo code so reader can see what is happening
                      warning = FALSE,
                      message = FALSE,
                      fig_caption = TRUE,
                      fig_height = 6,        # default, make it bigger to stretch vertical axis
                      fig_width = 8,
                      fig_width = 8,         # full width
                      tidy = TRUE)           # tidy up code in case echo = TRUE
# Load Packages ----
library(dkUtils)

myLibs <- c("data.table",
            "ggplot2",
            "here",
            "kableExtra",
            "lubridate",
            "plotly",
            "skimr")

dkUtils::loadLibraries(myLibs)              # Load script specific packages

# Project Settings ----
projLoc <- here::here()

# https://www.mfe.govt.nz/publications/air/indicator-update-air-quality-particulate-matter-%E2%80%93-pm10/indicator-update-air-quality
annualPm10Threshold_NZ <- 20 
dailyPm10Threshold_NZ <- 50 

# https://www.who.int/news-room/fact-sheets/detail/ambient-(outdoor)-air-quality-and-health
annualPm10Threshold_WHO <- 20
dailyPm10Threshold_WHO <- 50 
annualPm2.5Threshold_WHO <- 10
dailyPm2.5Threshold_WHO <- 25 

dPath <- "~/Data/NZ_mfe/airQuality/"
pm2.5File <- "mfe-pm25-concentrations-200817-CSV/pm25-concentrations-200817.csv"
pm10File <- "mfe-pm10-concentrations-200617-CSV/pm10-concentrations-200617.csv"

pmURL <- "https://data.mfe.govt.nz/data/category/air/"


# Adjust knitr options if required
knitr::opts_chunk$set(echo = TRUE)

# Log compile time:
startTime <- proc.time()

# Functions ----
makeTilePlot <- function(dt,yvar,byvar){
  p <- ggplot2::ggplot(dt, aes(x = ba_date, 
                               y = get(byvar), 
                               fill = get(yvar)
                               )
                       ) +
    geom_tile() +
    scale_fill_continuous(low = "green", high = "red") +
    labs(x = "Date")
  return(p)
}

makeLinePlot <- function(dt,yvar,byvar){
  p <- ggplot2::ggplot(dt, aes(x = ba_date, 
                               y = get(yvar),
                               colour = get(byvar)
                               )
                       ) +
    geom_line() +
    labs(x = "Date")
  return(p)
}

About

Contributions

Please note that authorship is alphabetical. Contributions are listed below - see github for details and who to blame for what :-).

Ben Anderson (b.anderson@soton.ac.uk @dataknut)

Code

Citation

If you wish to refer to any of the material from this report please cite as:

Report circulation:

Report purpose:

This work has received funding from the European Union’s Horizon 2020 research and innovation programme under the Marie Skłodowska-Curie grant agreement No 700386 (SPATIALEC).

This work is (c) r format(Sys.time(), format = "%Y") the University of Southampton.

Introduction

LAWA seems to hold sub-daily data as it is used to create almost real-time plots & reports - see https://www.lawa.org.nz/explore-data/otago-region/air-quality/alexandra/alexandra-at-5-ventry-street/

It is unclear how we can access this data so for now we have used the MfE data.

PM 10 data

PM 10 data: has more sensors and wider coverage.

Data source: r pmURL

Data file: r pm10File

df <- paste0(dPath, pm10File)

pm10dt <- data.table::fread(df)
pm10dt[, ba_date := lubridate::as_date(date)]
# the data is daily but there may be gaps?
pm10dt[, council.site := paste0(council, ".", site)]

Overall there are:

# looks like daily data with gaps
p <- makeTilePlot(pm10dt[council.site %like% "ORC"], yvar = "pm10", byvar = "council.site")
p + labs(y = "pm10") + 
  guides(fill=guide_legend(title="pm10"))

st <- pm10dt[, .(mean_PM10 = mean(pm10),
                min_PM10 = min(pm10),
                max_PM10 = max(pm10),
                nObs = .N,
                startDate = min(ba_date),
                endDate = max(ba_date)),
             keyby = .(council)]

kableExtra::kable(st, digits = 2,caption = "Summary statstics for PM10 by Council")  %>%
  kable_styling()

Table \@ref(tab:pm10TestData) suggests there are negative values for some days. Why?

Figure \@ref(fig:pm10TestDataPlotly) shows daily values for all sites and indicates those that cross the:

# looks like daily data with gaps
p <- makeLinePlot(pm10dt,yvar = "pm10",byvar = "council.site" )

p <- p + labs(y = "pm10", caption = "NZ/WHO threshold shown in red") +
  geom_hline(yintercept = dailyPm10Threshold_WHO, colour = "red") +
  geom_hline(yintercept = dailyPm10Threshold_NZ, colour = "red") +
  guides(colour=guide_legend(title="pm10")) +
  theme(legend.position = "bottom") +
  facet_grid(council ~ .)

p

plotly::ggplotly(p)

PM 2.5 data

PM 2.5 data: has fewer sensors and less coverage.

Data source: r pmURL

Data file: r pm2.5File

df <- paste0(dPath, pm2.5File)

pm2.5dt <- data.table::fread(df)
pm2.5dt[, ba_date := lubridate::as_date(date)]
# the data is daily but there may be gaps?
pm2.5dt[, council.site := paste0(council, ".", site)]

Overall there are:

# looks like daily data with gaps
p <- makeTilePlot(pm2.5dt, yvar = "pm2_5", byvar = "council.site")
p + labs(y = "pm2_5") + 
  guides(fill=guide_legend(title="pm2_5"))

st <- pm2.5dt[, .(mean_PM10 = mean(pm2_5),
                min_PM10 = min(pm2_5),
                max_PM10 = max(pm2_5),
                nObs = .N,
                startDate = min(ba_date),
                endDate = max(ba_date)),
             keyby = .(council)]

kableExtra::kable(st, caption = "Summary statstics for PM2.5 by Council") %>%
  kable_styling()

We also seem to have some negative values here...

Figure \@ref(fig:pm2_5TestDataPlotly) shows daily values for all sites and indicates those that cross the:

NZ has yet to set a PM2.5 exposure threshold.

# looks like daily data with gaps
p <- makeLinePlot(pm2.5dt[council.site %like% "ORC" | council.site %like% "ECAN"],yvar = "pm2_5",byvar = "council.site" )

p <- p + labs(y = "pm2_5", caption = "WHO threshold shown in red") +
  geom_hline(yintercept = dailyPm2.5Threshold_WHO, colour = "red") +
  guides(colour=guide_legend(title="pm2_5")) +
  theme(legend.position = "bottom") +
  facet_grid(council.site ~ .)

p

plotly::ggplotly(p)

Statistical Annex

PM10

skimr::skim(pm10dt)
# looks like daily data with gaps
t <- pm10dt[, .(nObs = .N), keyby = .(council, site)]

kableExtra::kable(t, caption = "N obs at sites (PM10)") %>%
  kable_styling()

PM2.5

skimr::skim(pm2.5dt)
# looks like daily data with gaps
t <- pm2.5dt[, .(nObs = .N), keyby = .(council, site)]

kableExtra::kable(t, caption = "N obs at sites (PM2.5)") %>%
  kable_styling()

Runtime

Report generated using knitr in RStudio with r R.version.string running on r R.version$platform (r Sys.info()[3]).

t <- proc.time() - startTime

elapsed <- t[[3]]

Analysis completed in r elapsed seconds ( r round(elapsed/60,2) minutes).

R packages used:

References



CfSOtago/airQual documentation built on Nov. 13, 2020, 8:08 a.m.