# Set knitr options knitr::opts_chunk$set(echo = FALSE, # echo code so reader can see what is happening warning = FALSE, message = FALSE, fig_caption = TRUE, fig_height = 6, # default, make it bigger to stretch vertical axis fig_width = 8, fig_width = 8, # full width tidy = TRUE) # tidy up code in case echo = TRUE
# Load Packages ---- library(dkUtils) myLibs <- c("data.table", "ggplot2", "here", "kableExtra", "lubridate", "plotly", "skimr") dkUtils::loadLibraries(myLibs) # Load script specific packages # Project Settings ---- projLoc <- here::here() # https://www.mfe.govt.nz/publications/air/indicator-update-air-quality-particulate-matter-%E2%80%93-pm10/indicator-update-air-quality annualPm10Threshold_NZ <- 20 dailyPm10Threshold_NZ <- 50 # https://www.who.int/news-room/fact-sheets/detail/ambient-(outdoor)-air-quality-and-health annualPm10Threshold_WHO <- 20 dailyPm10Threshold_WHO <- 50 annualPm2.5Threshold_WHO <- 10 dailyPm2.5Threshold_WHO <- 25 dPath <- "~/Data/NZ_mfe/airQuality/" pm2.5File <- "mfe-pm25-concentrations-200817-CSV/pm25-concentrations-200817.csv" pm10File <- "mfe-pm10-concentrations-200617-CSV/pm10-concentrations-200617.csv" pmURL <- "https://data.mfe.govt.nz/data/category/air/" # Adjust knitr options if required knitr::opts_chunk$set(echo = TRUE) # Log compile time: startTime <- proc.time() # Functions ---- makeTilePlot <- function(dt,yvar,byvar){ p <- ggplot2::ggplot(dt, aes(x = ba_date, y = get(byvar), fill = get(yvar) ) ) + geom_tile() + scale_fill_continuous(low = "green", high = "red") + labs(x = "Date") return(p) } makeLinePlot <- function(dt,yvar,byvar){ p <- ggplot2::ggplot(dt, aes(x = ba_date, y = get(yvar), colour = get(byvar) ) ) + geom_line() + labs(x = "Date") return(p) }
Please note that authorship is alphabetical. Contributions are listed below - see github for details and who to blame for what :-).
Ben Anderson (b.anderson@soton.ac.uk @dataknut
)
If you wish to refer to any of the material from this report please cite as:
r format(Sys.time(), format = "%Y")
) r params$title
: r params$subtitle
, University of Southampton: Southampton, UK.Report circulation:
Report purpose:
official
New Zealand Air Quality dataThis work has received funding from the European Union’s Horizon 2020 research and innovation programme under the Marie Skłodowska-Curie grant agreement No 700386 (SPATIALEC).
This work is (c) r format(Sys.time(), format = "%Y")
the University of Southampton.
LAWA seems to hold sub-daily data as it is used to create almost real-time plots & reports - see https://www.lawa.org.nz/explore-data/otago-region/air-quality/alexandra/alexandra-at-5-ventry-street/
It is unclear how we can access this data so for now we have used the MfE data.
PM 10 data: has more sensors and wider coverage.
Data source: r pmURL
Data file: r pm10File
df <- paste0(dPath, pm10File) pm10dt <- data.table::fread(df) pm10dt[, ba_date := lubridate::as_date(date)] # the data is daily but there may be gaps? pm10dt[, council.site := paste0(council, ".", site)]
Overall there are:
r uniqueN(pm10dt$site)
sites spread overr uniqueN(pm10dt$council)
councils# looks like daily data with gaps p <- makeTilePlot(pm10dt[council.site %like% "ORC"], yvar = "pm10", byvar = "council.site") p + labs(y = "pm10") + guides(fill=guide_legend(title="pm10")) st <- pm10dt[, .(mean_PM10 = mean(pm10), min_PM10 = min(pm10), max_PM10 = max(pm10), nObs = .N, startDate = min(ba_date), endDate = max(ba_date)), keyby = .(council)] kableExtra::kable(st, digits = 2,caption = "Summary statstics for PM10 by Council") %>% kable_styling()
Table \@ref(tab:pm10TestData) suggests there are negative values for some days. Why?
Figure \@ref(fig:pm10TestDataPlotly) shows daily values for all sites and indicates those that cross the:
r dailyPm10Threshold_WHO
) - see https://www.who.int/news-room/fact-sheets/detail/ambient-(outdoor)-air-quality-and-healthr dailyPm10Threshold_NZ
) - https://www.mfe.govt.nz/publications/air/indicator-update-air-quality-particulate-matter-%E2%80%93-pm10/indicator-update-air-quality# looks like daily data with gaps p <- makeLinePlot(pm10dt,yvar = "pm10",byvar = "council.site" ) p <- p + labs(y = "pm10", caption = "NZ/WHO threshold shown in red") + geom_hline(yintercept = dailyPm10Threshold_WHO, colour = "red") + geom_hline(yintercept = dailyPm10Threshold_NZ, colour = "red") + guides(colour=guide_legend(title="pm10")) + theme(legend.position = "bottom") + facet_grid(council ~ .) p plotly::ggplotly(p)
PM 2.5 data: has fewer sensors and less coverage.
Data source: r pmURL
Data file: r pm2.5File
df <- paste0(dPath, pm2.5File) pm2.5dt <- data.table::fread(df) pm2.5dt[, ba_date := lubridate::as_date(date)] # the data is daily but there may be gaps? pm2.5dt[, council.site := paste0(council, ".", site)]
Overall there are:
r uniqueN(pm2.5dt$site)
sites spread overr uniqueN(pm2.5dt$council)
councils# looks like daily data with gaps p <- makeTilePlot(pm2.5dt, yvar = "pm2_5", byvar = "council.site") p + labs(y = "pm2_5") + guides(fill=guide_legend(title="pm2_5")) st <- pm2.5dt[, .(mean_PM10 = mean(pm2_5), min_PM10 = min(pm2_5), max_PM10 = max(pm2_5), nObs = .N, startDate = min(ba_date), endDate = max(ba_date)), keyby = .(council)] kableExtra::kable(st, caption = "Summary statstics for PM2.5 by Council") %>% kable_styling()
We also seem to have some negative values here...
Figure \@ref(fig:pm2_5TestDataPlotly) shows daily values for all sites and indicates those that cross the:
r dailyPm2.5Threshold_WHO
) - see https://www.who.int/news-room/fact-sheets/detail/ambient-(outdoor)-air-quality-and-healthNZ has yet to set a PM2.5 exposure threshold.
# looks like daily data with gaps p <- makeLinePlot(pm2.5dt[council.site %like% "ORC" | council.site %like% "ECAN"],yvar = "pm2_5",byvar = "council.site" ) p <- p + labs(y = "pm2_5", caption = "WHO threshold shown in red") + geom_hline(yintercept = dailyPm2.5Threshold_WHO, colour = "red") + guides(colour=guide_legend(title="pm2_5")) + theme(legend.position = "bottom") + facet_grid(council.site ~ .) p plotly::ggplotly(p)
skimr::skim(pm10dt)
# looks like daily data with gaps t <- pm10dt[, .(nObs = .N), keyby = .(council, site)] kableExtra::kable(t, caption = "N obs at sites (PM10)") %>% kable_styling()
skimr::skim(pm2.5dt)
# looks like daily data with gaps t <- pm2.5dt[, .(nObs = .N), keyby = .(council, site)] kableExtra::kable(t, caption = "N obs at sites (PM2.5)") %>% kable_styling()
Report generated using knitr in RStudio with r R.version.string
running on r R.version$platform
(r Sys.info()[3]
).
t <- proc.time() - startTime elapsed <- t[[3]]
Analysis completed in r elapsed
seconds ( r round(elapsed/60,2)
minutes).
R packages used:
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.