# start constants
# abbrev used in NAEI, in SNAP sector order 1-11
v_sector <- c("energyprod", "domcom", "indcom", "indproc", "offshore",
"solvents", "roadtrans", "othertrans", "waste", "agric", "nature")
n_sectors <- length(v_sector)
# end constants
#' getEmissionTimeSeriesFromBEIS
#'
#' Get a time series of pollutants from the BEIS web server
#' @param startYear start year of time series
#' @param endYear end year of time series
#' @param query_id_ghg url to the template repository
#' @param v_pollutant_name_ghg vector of names of pollutants under BEIS GHG tab
#' @param query_id_airpoll destination remote url
#' @param v_pollutant_name_airpoll vector of names of pollutants under BEIS Air Pollution tab
#'
#' @details Need to run two queries first at https://naei.beis.gov.uk/data/data-selector
#' with one vector of GHGs and one vector of air pollutants.
#' These need to match the pollutants in /data/lookups/pollutants.csv.
#' @examples
#' \dontrun{
#' dt <- getEmissionTimeSeriesFromBEIS(
#' startYear = 1990,
#' endYear = 2018,
#' query_id_ghg = 145161,
#' v_pollutant_name_ghg = c("co2", "ch4", "n2o"),
#' query_id_airpoll = 145164,
#' v_pollutant_name_airpoll = c("co", "nox", "nh3", "bz", "so2", "voc"))
#' }
getEmissionTimeSeriesFromBEIS <- function(
startYear = 1990,
endYear = 2019, # 2018, updated to latest NAEI year
query_id_ghg = 151517, # 145161, SAM updated for new query generation on NAEI
v_pollutant_name_ghg = c("co2", "ch4", "n2o"),
query_id_airpoll = 151579, # 145164, SAM updated for new query generation on NAEI
v_pollutant_name_airpoll = c("co", "nox", "nh3", "bz", "so2", "voc"),
sector_classification = "SNAP",
lookup_NFR # NFR to SNAP lookup table
){
df_lookup <- read.csv("./data/lookups/pollutants.csv", stringsAsFactors = FALSE)
# output will be written to a temporary file
fname <- paste0("./data/NAEI_ts_wide.csv")
# if necessary, delete any existing copy because we later append to it
if (file.exists(fname)) file.remove(fname)
## SJT: i have changed the script do to AP first
## This is because the AP run from 1970 to 20xx and GHG from 1990
## Doing GHG first means that appended AP are do not align by 20 years and no error is created.
## ALSO: change to make lists and then bind together to respect col headers, append does not do this
## thought: the id query on NAEI can be changed to run only from 1990, thereby solving append problem, but
## this seems to restrict the data that can be called in from NAEI
ind <- match(v_pollutant_name_airpoll, df_lookup$pollutant) # no such name `name_pollutant`; changed
v_pollutant_id <- df_lookup$pollutant_id[ind] # c(2, 3, 5)
v_url <- paste0("https://naei.beis.gov.uk/data/download?q=", query_id_airpoll, "§ion=ukdata&pollutantId=", v_pollutant_id)
# read from the query URL into a list of data tables
l_dt_ap <- lapply(v_url, fread, na.strings = "-", header=T)
ind <- match(v_pollutant_name_ghg, df_lookup$pollutant)
v_pollutant_id <- df_lookup$pollutant_id[ind] # c(2, 3, 5)
v_url <- paste0("https://naei.beis.gov.uk/data/download?q=", query_id_ghg, "§ion=ukdata&pollutantId=", v_pollutant_id)
# read from the query URL into a list of data tables
l_dt_ghg <- lapply(v_url, fread, na.strings = "-", header=T) # SJT: this structure is not the same as AP
# write all to one csv, because the data type recognition doesnt work from the query
# data frames in list do not all have same colClasses
## THE DATA APPEND DOES NOT WORK BY COL NAME
#lapply(l_dt, fwrite, file = fname, append = TRUE)
# bind together the data, using column names (differing years of data) and write.
dt_ap_ghg <- rbindlist(c(l_dt_ap, l_dt_ghg), use.names = T, fill = T)
fwrite(dt_ap_ghg, fname, eol = "\n")
# then read back in from the csv
dt <- fread(fname, na.strings = c("-"), header=T)
# change blanks to NA
dt$Source[dt$Source == ""] <- NA
dt$Activity[dt$Activity == ""] <- NA
# reshape to long format # SJT: i have reservations about using col indices - use names?
startCol <- 6
endCol <- dim(dt)[2]
dt <- as.data.table(pivot_longer(dt, cols = startCol:endCol,
names_to = "year", values_to ="emission"))
# remove / and space characters from names
names(dt) <- make.names(names(dt))
# and make consistent
setnames(dt, c("Gas", "Units"),
c("pollutant", "units"))
#summary(dt)
#unique(dt$NFR.CRF.Group)
# remove rows outwith data range
dt <- dt[year >= startYear & year <= endYear]
# make a subset containing only totals (where Source is NA) for later comparison
dt_total <- subset(dt, is.na(Source))
# remove rows containing totals (where Source is NA)
dt <- subset(dt, !is.na(Source))
df_lookup <- read.csv("./data/lookups/pollutants.csv", stringsAsFactors = FALSE)
ind <- match(dt$pollutant, df_lookup$pollutant_longName)
dt$pollutant <- df_lookup$pollutant[ind]
# unique(dt$pollutant)
# any(is.na(dt$pollutant))
df_lookup <- fread(lookup_NFR, stringsAsFactors = FALSE)
#df_lookup <- read.csv("./data/lookups/NFR19_to_SNAP.csv", stringsAsFactors = FALSE) # updated to new
ind <- match(dt$NFR.CRF.Group, df_lookup$NFR19)
# set snap to matching value in lookup table
# "avi.cruise" is only non-numeric, and set to NA
dt$snap_id <- as.numeric(df_lookup$SNAP[ind])
dt <- dt[!is.na(snap_id)]
# unique(dt$snap_id)
# unique(dt$Units)
if (sector_classification == "SNAP"){
dt <- dt[, .(emission = sum(emission, na.rm = TRUE)), by = .(pollutant, year, snap_id)]
} else if (sector_classification == "NFR"){
dt <- dt[, .(emission = sum(emission, na.rm = TRUE)), by = .(pollutant, year, NFR.CRF.Group)]
}
# year returned as character; need to convert back to integers
dt$year <- as.numeric(dt$year)
# set units
# given as kilotonnes = Gg = 1e9 g
units(dt$emission) <- "Gg /yr"
# point and diffuse data are in Mg (= tonnes)
dt$emission <- set_units(dt$emission, Mg /yr)
# but convert CO2-C to CO2 - only odd one out?
dt[pollutant == "co2", emission := emission /12*44]
# # check that the totals of the remaining rows add to the
# # pre-calculated totals
# dt$NFR.CRF.Group_1 <- substr(dt$NFR.CRF.Group, 1, 2)
# head(dt)
# dt_total_check <- dt[, .(F_sum = sum(emission, na.rm = TRUE)), by = .(Gas, NFR.CRF.Group_1, year)]
# dft <- inner_join(dt_total_check, dt_total,
# by = c("Gas", "NFR.CRF.Group_1" = "NFR.CRF.Group", "year"))
# dim(dt_total)
# add missing levels
dt <- dt %>%
tidyr::complete(snap_id = 1:n_sectors, nesting(pollutant, year),
fill = list(emission = 0))
dt <- as.data.table(dt)
# do we want to write to file?
fwrite(dt, file = "./data/NAEI_ts_long.csv", eol = "\n")
#dt <- fread(file = "./data/NAEI_ts_long.csv")
return(dt)
}
getEmissionTimeSeriesFromUNFCCC <- function(
country_code = "IE",
startYear = 1990,
endYear = 2019,
v_pollutant_name_ghg = c("co2", "ch4", "n2o"),
sector_classification = "SNAP",
lookup_CRF # added here, similar to NFR lookup above.
){
dt <- fread("./data/UNFCCC_GHG_ts.csv")
#dim(dt)
#names(dt)
setnames(dt, c("Pollutant_name","Year","Unit","emissions"),
c("pollutant","year","Units","emission"))
#summary(dt)
# subset to country, without NA values
dt <- dt[Country_code == country_code & !is.na(emission)]
dt[, pollutant := tolower(pollutant)]
dt[, year := as.numeric(year)]
dt <- dt[pollutant %in% v_pollutant_name_ghg]
dt <- dt[year >= startYear & year <= endYear]
# read lookup table for CRF to SNAP sector codes
# !stringsAsFactors option needed on older R versions
## use the new CRF to SNAP lookup. Use UNFCCC data.
df_lookup <- fread(lookup_CRF, stringsAsFactors = FALSE)
ind <- match(dt$Sector_code, df_lookup$CRF)
# set snap_id to matching value in lookup table
dt[, snap_id := as.numeric(df_lookup$SNAP[ind])]
# Sector totals are only missing values - check non-matched are just these
# sum(is.na(dt$snap_id)) # 522
dt <- dt[!is.na(snap_id)]
# calculate totals by SNAP sector or other sector_classification
if (sector_classification == "SNAP"){
dt <- dt[, .(emission = sum(emission, na.rm = TRUE), units = first(Units)), by = .(pollutant, year, snap_id)]
} else if (sector_classification == "NFR"){ # would need to look up other codes before this will work
dt <- dt[, .(emission = sum(emission, na.rm = TRUE), units = first(Units)), by = .(pollutant, year, Parent_sector_code)]
}
# add missing levels
dt <- dt %>%
tidyr::complete(snap_id = 1:n_sectors, nesting(pollutant, year),
fill = list(emission = 0))
dt <- as.data.table(dt)
dt[pollutant=="co2" & year==2019]
# set units
# given as Gg = 1e9 g
units(dt$emission) <- "Gg / yr"
# point and diffuse data are in Mg (= tonnes)
dt$emission <- set_units(dt$emission, Mg / yr)
# write to file
fwrite(dt, file = "./data/UNFCCC_ts_long.csv", eol = "\n")
return(dt)
}
#' getPointSourceData
#'
#' Get a time series of pollutant point source data from the NAEI files
#' @param startYear start year of time series
#' @param endYear end year of time series
#'
#' @details Files have been downloaded from NAEI (https://naei.beis.gov.uk/data/)
#' but old files are no longer directly available from web site.
#' This is straightforward, apart from standardising names over time.
#' @examples
#' \dontrun{
#' dt <- getPointSourceData(startYear = 2010, endYear = 2018)
#' }
getPointSourceData <- function(startYear = 2010, endYear = 2019, lookup_SIC){
v_year <- startYear:endYear
v_fname <- paste0("./data-raw/point_sources/NAEIPointsSources_", v_year, ".xlsx")
l_df <- suppressWarnings(lapply(v_fname, read_excel, sheet = "Data"))
# l_names <- lapply(l_df, names)
# lapply(l_names, `[[`, 2)
# lapply(l_df, head)
# lapply(l_df, standardiseNames)
for (i in 1:length(l_df)){
l_df[[i]] <- standardiseNames(l_df[[i]])
}
# lapply(l_df, names)
# lapply(l_names, `[[`, 2)
dt <- as.data.table(bind_rows(l_df))
##* WIP - the list in pollutants.csv is incomplete
##* SJT: something is wrong with the match() functions, creating 'extra' SNAP codes where they shouldnt be
##* i think it is the presence of TWO lines for pm25, which causing multiple matches.
df_lookup <- as.data.table(read.csv("./data/lookups/pollutants.csv", stringsAsFactors = FALSE))
df_lookup <- df_lookup[pollutant != "pm2_5"]
ind <- match(dt$pollutant_id, df_lookup$pollutant_id)
dt$pollutant <- df_lookup$pollutant[ind]
df_lookup <- fread(lookup_SIC, stringsAsFactors = FALSE)
## cannot match like below, as SectorIDs map to different SNAPs dependent on pollutant (only in some instances) - V ANNOYING
## there is a problem here as the point source pollutant names are complex (and change) so cant match to the SIC_to_SNAP table. Must have a pollutant name to enable it.
## **Need to change SIC-to_SNAP lookup to include pollutant_id **. Done.
#ind <- match(dt$sic_id, df_lookup$SectorID)
#dt$snap_id_fromSIC <- df_lookup$SNAP[ind]
setnames(df_lookup, c("Pollutant","Pollutant_id","SectorID","SNAP"), c("pollutant_name","pollutant_id","sic_id","snap_id_fromSIC"))
dt <- df_lookup[dt, on =c("sic_id","pollutant_id")]
# only use this if snap_id is missing
## does this prioritise SNAP data originally in point files? is this wise? is it out of date? ##
dt[is.na(snap_id), snap_id := snap_id_fromSIC]
return(dt)
}
## ----standardiseNames----
#' Standardises names in a data frame
#'
#' This function standardises names in a data frame.
#' @param df A data frame.
#' @export
#' @examples
#' \dontrun{
#' dt_pts <- standardiseNames(dt_pts)
#' }
standardiseNames <- function(df){
# common variables
if (exists("year", where=df)){
df[["Year"]] <- df[["year"]]
df[["year"]] <- NULL
}
if (exists("SectorID", where=df)){
df[["sic_id"]] <- df[["SectorID"]]
df[["SectorID"]] <- NULL
}
if (exists("Point_SNAPID", where=df)){
df[["snap_id"]] <- df[["Point_SNAPID"]]
df[["Point_SNAPID"]] <- NULL
}
if (exists("Point_SNAP Code", where=df)){
df[["snap_id"]] <- df[["Point_SNAP Code"]]
df[["Point_SNAP Code"]] <- NULL
}
if (exists("SNAP Code", where=df)){
df[["snap_id"]] <- df[["SNAP Code"]]
df[["SNAP Code"]] <- NULL
}
if (exists("Point_NFR Code", where=df)){
df[["nfr_id"]] <- df[["Point_NFR Code"]]
df[["Point_NFR Code"]] <- NULL
}
if (exists("Point_NFRCode", where=df)){
df[["nfr_id"]] <- df[["Point_NFRCode"]]
df[["Point_NFRCode"]] <- NULL
}
if (exists("NFR Code", where=df)){
df[["nfr_id"]] <- df[["NFR Code"]]
df[["NFR Code"]] <- NULL
}
if (exists("Pollutant Name", where=df)){
df[["pollutant_name"]] <- df[["Pollutant Name"]]
df[["Pollutant Name"]] <- NULL
}
if (exists("FullPollName", where=df)){
df[["pollutant_name"]] <- df[["FullPollName"]]
df[["FullPollName"]] <- NULL
}
if (exists("PollCode", where=df)){
df[["pollutant_id"]] <- df[["PollCode"]]
df[["PollCode"]] <- NULL
}
if (exists("Pollcode", where=df)){
df[["pollutant_id"]] <- df[["Pollcode"]]
df[["Pollcode"]] <- NULL
}
if (exists("PollutantID", where=df)){
df[["pollutant_id"]] <- df[["PollutantID"]]
df[["PollutantID"]] <- NULL
}
if (exists("PollutantD", where=df)){
df[["pollutant_id"]] <- df[["PollutantD"]]
df[["PollutantD"]] <- NULL
}
if (exists("OS_GRE", where=df)){
df[["x"]] <- df[["OS_GRE"]]
df[["OS_GRE"]] <- NULL
}
if (exists("Easting", where=df)){
df[["x"]] <- df[["Easting"]]
df[["Easting"]] <- NULL
}
if (exists("OS_GRN", where=df)){
df[["y"]] <- df[["OS_GRN"]]
df[["OS_GRN"]] <- NULL
}
if (exists("Northing", where=df)){
df[["y"]] <- df[["Northing"]]
df[["Northing"]] <- NULL
}
if (exists("Operator_Full", where=df)){
df[["Operator"]] <- df[["Operator_Full"]]
df[["Operator_Full"]] <- NULL
}
if (exists("Sourcecode", where=df)){
df[["WebSourcecode"]] <- df[["Sourcecode"]]
df[["Sourcecode"]] <- NULL
}
if (exists("Emission", where=df)){
df[["emission"]] <- df[["Emission"]]
df[["Emission"]] <- NULL
}
if (exists("SumOfEmission", where=df)){
df[["emission"]] <- df[["SumOfEmission"]]
df[["SumOfEmission"]] <- NULL
}
if (exists("Units", where=df)){
df[["Unit"]] <- df[["Units"]]
df[["Units"]] <- NULL
}
if (exists("Data Type", where=df)){
df[["Datatype"]] <- df[["Data Type"]]
df[["Data Type"]] <- NULL
}
return(df)
}
# getAlpha_byYear function to be written to data.table syntax?
getAlpha_byYear <- function(dt, year_ref){
# calculate the ref emission for each pollutant/sector/year
# extract the reference year values
dt_ref <- dt[year == year_ref, emission, by = .(pollutant, snap_id)]
# add them back in
dt <- left_join(dt, dt_ref, by = c("pollutant", "snap_id"), suffix = c("", "_ref"))
# and calculate alpha as ratio
# data table version produces a warning: "Invalid .internal.selfref detected ..."
#dt[, alpha_year := emission / emission_ref][]
# so using standard version instead - SJT removing, not needed in scaling function later?
dt$alpha_year <- dt$emission / dt$emission_ref
return(dt)
}
plotAlpha_byYear <- function(dt, v_pollutant_to_plot = c("ch4", "co2", "n2o")){
# extract the pollutant-specific values
#v_pollutant_to_plot <- c("co", "nh3", "nox")
dts <- dt[pollutant %in% v_pollutant_to_plot]
p <- ggplot(dts, aes(year, alpha_year, colour = pollutant))
p <- p + geom_line()
p <- p + facet_wrap(~ snap_id, scale = "free")
p <- p + ylab(expression(alpha[i*","*year]))
p <- p + ggtitle(expression(paste("Relative variation in emissions, ", italic(alpha)[i*","*year])))
p
ggsave(p, file = "./docs/alpha_vsYear_byGHG.png")
return(p)
}
# s_uk <- readUKDiffuseData(pollutant = "co2")
readUKDiffuseData <- function(pollutant, year){ # 'year=' causes problems!! Force to 2019?
# we have no data prior to 2010, so use 2010 data for earlier years
if (year < 2010){
year <- 2010
print("UK diffuse-source mapped data only exist from 2010 - using these data for earlier years")
}
dir_data_raw <- paste0("./data-raw/diffuse_uk/", pollutant) # linked to 2019 NAEI maps
#dir_data_out <- paste0("./data/refYearStacks")
# read in area sources of GHG emissions
#v_fname <- paste(dir_data_raw, "/",
# v_sector, pollutant, substr(year, 3, 4), ".tif", sep = "") # changed from .asc to .tif
# make a stack with a layer for each sector
#s <- stack(v_fname)
# have changed the v_fname to loop and stack. While uglier, this allows flexibility for adding in missing SNAPs as and when they occur
# For e.g. CH4 does not have solvents while SOx does not have agri or natural
s <- stack()
r_blank <- raster(list.files(dir_data_raw, pattern = paste0(paste(v_sector,collapse="|"), pollutant, substr(year, 3, 4),".tif$"), full.names = T)[1])
r_blank[] <- NA
names(r_blank) <- NA
for(i in v_sector){
if(file.exists(paste0(dir_data_raw,"/",i, pollutant, substr(year, 3, 4),".tif"))){
ri <- raster(paste0(dir_data_raw,"/",i, pollutant, substr(year, 3, 4),".tif"))
s <- stack(s, ri)
}else{
ri <- r_blank
names(ri) <- paste0(i, pollutant, substr(year, 3, 4))
s <- stack(s, ri)
}
}
# set projection to be OSGB 1936 in consistent form
projection(s) <- projOSGB
# units are Mg / km^2 / yr
# but convert CO2-C to CO2 - only odd one out?
if (pollutant == "co2") s <- s /12*44
attr(s, "units") <- "Mg / km^2 / yr"
#writeRaster(s, file = paste0(dir_data_out, "/s_", pollutant, "_uk.tif"), overwrite=TRUE)
return(s)
}
# s_ie <- readIEDiffuseData(pollutant = "ch4")
readIEDiffuseData <- function(pollutant, year,
lookup_GNFR = "./data/lookups/GNFR_to_SNAP.csv"){ # 'year=' causes problems!! Force to 2019
# we only have data for 2016, so use this for all years (actually 2015 also exist, but not processed)
if (year != 2019){ # have altered to read 2019 maps
year <- 2019
print("Irish diffuse-source mapped data now exist for 2019 - using these data for all years")
}
dir_data_raw <- paste0("./data-raw/diffuse_ie/", pollutant) # linked to 2019 MapEire maps
#dir_data_out <- paste0("./data/refYearStacks")
v_fname <- list.files(dir_data_raw, pattern = paste0(year, "_*"))
v_GNFR_letter <- toupper(substr(v_fname, 6, 6))
df_lookup <- read.csv(lookup_GNFR, stringsAsFactors = FALSE)
ind <- match(v_GNFR_letter, df_lookup$GNFR_letter)
v_snap_id <- df_lookup$snap_id[ind]
# remove files without SNAP match
#v_fname <- v_fname[!is.na(v_snap_id)]
# make a stack with a layer for each GNFR sector
s_gnfr <- suppressWarnings(stack(paste(dir_data_raw, v_fname, sep = "/")))
# make a stack with a layer for each SNAP sector
s <- stack(s_gnfr[[1:n_sectors]])
# remove existing data
s <- setValues(s, NA)
# the following loop causes warnings - could be avoided with calc?
for (i in 1:n_sectors){
if (i %in% v_snap_id){ # sector 4 is missing - all industry allocated to SNAP 3
s[[i]] <- suppressWarnings(sum(s_gnfr[[which(v_snap_id == i)]], na.rm=T)) # added na.rm=T as rasters not summing correctly. Warnings here are generated when only 1 layer from s_gnfr matches a SNAP. Suppress.
}
}
names(s) <- v_sector
# set projection to be OSGB 1936 in consistent form
projection(s) <- projOSGB
# units seem to be in Mg / km^2 / yr
attr(s, "units") <- "Mg / km^2 / yr"
#writeRaster(s, file = paste0(dir_data_out, "/s_", pollutant, "_ie.tif"), overwrite=TRUE)
return(s)
}
add_points_to_diffuse <- function(year, s, dt_pts, pollutant){
# subset point source data to just pollutant of interest
# setkey(dt_pts, pollutant)
# dt_pts <- dt_pts[pollutant]
# "Year" in dt_pts is inconsistently with uppercase
# "get" retrives the variable rather than the column name
dt_pts <- dt_pts[pollutant == get("pollutant", envir = parent.frame(3)) &
Year == get("year", envir = parent.frame(3))]
#dim(dt_pts)
# SJT: add in line to convert CO2-C to CO2, not done elsewhere.
if(pollutant == "co2") dt_pts[, emission := emission/12*44]
for (i_sector in 1:n_sectors){
##* WIP
# subset point source data for this sector
dt_pts_sub <- dt_pts[snap_id == i_sector]
# rasterise point sources
# if no values for this sector, no point sources to rasterise
if (sum(dt_pts_sub$emission) == 0){
r_pts <- s[[i_sector]]
r_pts[] <- 0
} else {
# make a raster with the sum of point sources in each grid cell
xy <- cbind(dt_pts_sub$x, dt_pts_sub$y)
r_pts <- rasterize(xy, s, dt_pts_sub$emission, fun=sum)
r_pts[is.na(r_pts)] <- 0 # so we can add with missing values - use na.rm = TRUE in sum function below instead?
}
# this is not working - changing to calc
# which means creating a blank raster for no points
#s[[i_sector]] <- s[[i_sector]] + r_pts
s[[i_sector]] <- calc(stack(s[[i_sector]], r_pts), sum , na.rm=T)
} # end sectors loop
return(s)
}
scale_stack_to_nationalTotal <- function(year, sp, dt_ts, pollutant){
# subset time series to just pollutant and year of interest
dt_ts <- dt_ts[pollutant == get("pollutant", envir = parent.frame(3)) &
year == get("year", envir = parent.frame(3))]
setkey(dt_ts, snap_id) # set row order
#sum(dt_ts$emission[1:11]) / 1e6 ### needs unit change to CO2 ###
ind_check <- dt_ts[snap_id %in% c(3,4), sum(emission, na.rm=T)]
## sum the industrial sectors here, before national total scaling.
## need to think more on whether this is correct/robust. Should we change the SNAP look-ups instead? or add a function argument?
dt_ts[snap_id==3, emission := dt_ts[snap_id %in% c(3,4), sum(emission, na.rm=T)]]
dt_ts[snap_id==4, emission := 0]
if(dt_ts[snap_id==3, emission] != ind_check) print("Extra industrial emissions created. Check")
sp[[3]] <- calc(stack(sp[[3]],sp[[4]]), sum, na.rm=T)
sp[[4]] <- setValues(sp[[4]],0)
for (i_sector in 1:n_sectors){
# subset time series data for this sector
dt_ts_sub <- dt_ts[snap_id == i_sector]
# assumes national and spatial data are in same units, Mg; otherwise unit conversion would be needed here
national_total <- dt_ts_sub$emission
spatial_total <- cellStats(sp[[i_sector]], "sum", na.rm = TRUE)
# calc adjustment multiplier
scaling_factor <- national_total / spatial_total
scaling_factor <- set_units(scaling_factor, NULL)
print(paste0("Sector ", i_sector,": Nat total - ",round(national_total,1)," ; Spatial total - ", round(spatial_total,1)," ; Scale fac - ",round(scaling_factor,2)))
# apply it
sp[[i_sector]] <- sp[[i_sector]] * scaling_factor
} # sectors
return(sp)
}
## ----unit_conversions, eval=TRUE-----------------------------------------
#' A unit conversion function
#'
#' This function converts from Tg km-2 y-1 to standard SI units in m-2 s-1.
#' @param pollutant Pollutant name: one of "ch4", "co2", "n2o", "c2h6" or "voc". Defaults to "ch4".
#' @param unit_in Units of input data. Defaults to "Mg / km^2 / yr".
#' @param unitType Either molar ("mol") or mass-based ("g").
#' @param unitSIprefix Any standard SI prefix for the output units, from "peta" to "pico".
#' @keywords internal units
#' @export
#' @seealso \code{\link{calcFlux}}, the higher-level function which calls this.
#' @examples
#' unit_conversion("ch4", "mol", "nano")
#' unit_conversion("co2", "mol", "micro")
#' unit_conversion("n2o", "mol", "nano")
#' unit_conversion("ch4", "g", "nano")
unit_conversion <- function(pollutant = c("ch4", "co2", "n2o", "c2h6", "voc", "co"),
unitType = c("mol", "g"),
unitSIprefix = c("peta", "tera", "giga", "mega", "kilo", "none", "milli", "micro", "nano", "pico"),
unit_in = c("Mg / km^2 / yr", "Gg / km^2 / yr", "Tg / km^2 / yr")){
pollutant <- match.arg(pollutant)
unitType <- match.arg(unitType)
unitSIprefix <- match.arg(unitSIprefix)
unit_in <- match.arg(unit_in)
if (unit_in == "Mg / km^2 / yr") Xg_to_g <- 1e06
if (unit_in == "Gg / km^2 / yr") Xg_to_g <- 1e09
if (unit_in == "Tg / km^2 / yr") Xg_to_g <- 1e12
km2_to_m2 <- 1e6
secsPerYear <- 365*24*60*60
# molecular weight of gases
if (pollutant == "ch4") {
molWt <- 16
} else if (pollutant == "co2") {
molWt <- 44
} else if (pollutant == "co") {
molWt <- 28
} else if (pollutant == "n2o") {
molWt <- 44
} else if (pollutant == "c2h6") {
molWt <- 30 # 12*2 + 6
} else if (pollutant == "voc") {
molWt <- 78.9516 # approximate mean from https://www.convertunits.com/molarmass/VOC
}
# ugly, but numerically correct
if (unitType == "g") {
molWt <- 1
}
# unit conversions - this could be outwith the function - stored as data
#unitSIprefix <- "peta"
SIprefix <- c("peta", "tera", "giga", "mega", "kilo", "none", "milli", "micro", "nano", "pico")
SI_multiplier <- c(1e-15, 1e-12, 1e-9, 1e-6, 1e-3, 1, 1e3, 1e6, 1e9, 1e12) # better with a seq function?
# match prefix with multiplier
i <- match(unitSIprefix, SIprefix)
mult <- SI_multiplier[i]
# gases usually in Tg km-2 y-1, converted to x m-2 s-1
value <- Xg_to_g *mult /molWt /km2_to_m2 /secsPerYear
# return name as well
name <- paste(SIprefix[i], unitType, pollutant, "m^-2_s^-1", sep="_")
return(list(value = value, # unit conversion multiplier
name = name, # unit conversion name
molWt = molWt)) # molecular weight
}
getMarineEmissionTimeSeries_annual <- function(pollutant, startYear = 2010, endYear = 2019){
# annual means
fname <- paste0("./data-raw/marine/", pollutant, "/b_F", pollutant, "_annual_amm7.tif")
b <- brick(fname)
names(b) <- 2005:2015
b[b < 0] <- NA
v_emission <- cellStats(b, mean)
v_emission <- set_units(v_emission, mol / m^2 / s)
v_year <- 2005:2015
# make a data frame matching the terrestrial time series
# snap_id pollutant year emission units
dt_year <- data.table(snap_id = 12, pollutant = pollutant, year = v_year,
emission = v_emission, units = as.character(units(v_emission)))
# calculate alpha
dt_year <- getAlpha_byYear(dt_year, year_ref = 2015)
##* WIP what to return?
# side-effect of resampling / writing maps here too?
return(dt_year)
}
#r_marine <- getMarineEmissionMap(pollutant = "co2", year = 2015)
getMarineEmissionMap <- function(pollutant, year){
v_year <- 2005:2015
# we have no data prior to 2010, so use 2010 data for earlier years
if (year < 2005){
year <- 2005
print("Marine data only exist from 2005 - using 2005 data for earlier years")
}
if (year > 2015){
year <- 2015
print("Marine data only exist up to 2015 - using 2015 data for later years")
}
# read in single file for fluxes
fname <- paste0("./data-raw/marine/", pollutant, "/b_F", pollutant, "_annual.tif")
b <- brick(fname)
names(b) <- v_year
ind <- match(year, v_year)
r <- b[[ind]]
# units are mol / m^2 / s
attr(r, "units") <- "mol / m^2 / s"
return(r)
}
#add_marine argument added, see ReadMe
get_maps_static <- function(year, s, r, dt_ts, dt_pts, pollutant, unitType = "mol", unitSIprefix = "none", add_points = TRUE, add_marine = TRUE){
if (add_points) s <- add_points_to_diffuse(year = year, s, dt_pts, pollutant = pollutant)
# reproject
sp <- projectRaster(from = s, to = r, method = "ngb") # rescale to original totals? BELOW
# as data isn't quite to national totals in maps PLUS reprojection changing values, rescale here
s <- scale_stack_to_nationalTotal(year = year, sp=sp, dt_ts, pollutant = pollutant)
# convert units
uc <- unit_conversion(pollutant, unitType = unitType, unitSIprefix = unitSIprefix, unit_in = "Mg / km^2 / yr")
s <- s * uc$value
# add marine for co2 & n2o - ONLY TO UK, previously was adding twice , SJT edit 21/1/22
# but add a blank layer to IE to allow dim to remain the same for merge later on get_maps_UKIE().
## we might need a blank placeholder for CH4 if that also needs to be 12 layers
if (pollutant == "co2" | pollutant == "n2o"){
if(add_marine==T){
if (unitType != "mol" & unitSIprefix != "none") stop(message = "Units needs to be mol /m2 /s to match marine data")
r_marine <- getMarineEmissionMap(pollutant = pollutant, year = year)
s <- addLayer(s, r_marine) # or s[[11]] <- s[[11]] + r with NA=0 ## SJT 09/12/21 KEY CHANGE. PL to double check. currently making marine a 12th sector 21/01/22
}else{
r_marine <- r
r_marine[] <- 0
s <- addLayer(s, r_marine) # if marine = F, add blank layer to ensure structure is ok.
}
# add an else clause if CH4 needs to be same 12 layer structure.
}else{
# if not CO2 or N2O, just add blank 12th layer (no marine)
r_marine <- r
r_marine[] <- 0
s <- addLayer(s, r_marine)
}
return(s)
}
# writeOutput(v_year = startYear:endYear, l_s, pollutant)
writeOutput <- function(v_year = startYear:endYear, l_s, pollutant){
v_fname <- paste0("./data/output/s_F_", pollutant, "_", v_year, ".tif")
for (i in 1:length(l_s)){
rf <- writeRaster(l_s[[i]], filename = v_fname[i], overwrite = TRUE)
}
return(v_fname)
}
# combine all pollutant-specific functions in one
# down-side of this is that all targets need to be rebuilt if anything changes
# add_marine argument added to get_maps_static(), as it was added twice
get_maps_UKIE <- function(year_ref, startYear, endYear, pollutant,
r, dt_pts, dt_ts_uk, dt_ts_ie, unitSIprefix = "none",
lookup_GNFR = "./data/lookups/GNFR_to_SNAP.csv"){
# get the diffuse source maps
s_diff_uk <- readUKDiffuseData(pollutant = pollutant, year = year_ref) # possibly problem here - no 'year=' so always defaulting to 2018 as per L854 AND L484 in readUKDiffuseData(). Have set in L484.
s_diff_ie <- readIEDiffuseData(pollutant = pollutant, year = year_ref, lookup_GNFR)
# combine diffuse and point sources, rescale and resample
l_s_uk <- lapply(startYear:endYear, FUN = get_maps_static, s_diff_uk, r=r, dt_ts_uk, dt_pts, pollutant = pollutant, unitSIprefix = unitSIprefix)
l_s_ie <- lapply(startYear:endYear, FUN = get_maps_static, s_diff_ie, r=r, dt_ts_ie, dt_pts, pollutant = pollutant, unitSIprefix = unitSIprefix, add_points = FALSE, add_marine = FALSE)
# merge the two together
#l_s <- lapply(1:length(l_s_uk), function(i) raster::merge(l_s_uk[[i]], l_s_ie[[i]]))
## this is the problem, merge isn't working
# unsure why merge is not working, it's just returning Irish values (it should use UK where present? maybe a bug?)
# instead, put uk and ireland in stack, then stackApply using indices, i.e. 1 and 12, 2 and 13 etc. Same structure so ok.
l_s_ukie <- lapply(1:length(l_s_uk), function(i) stack(l_s_uk[[i]], l_s_ie[[i]]))
l_s <- lapply(1:length(l_s_ukie), function(i) stackApply(l_s_ukie[[i]], indices=rep(1:12, 2), fun=sum, na.rm=T) )
# at the moment and if clause for ch4 as the 12th layer not added
#if(pollutant=="ch4"){
# l_s <- lapply(1:length(l_s_ukie), function(i) stackApply(l_s_ukie[[i]], indices=rep(1:11, 2), fun=sum, na.rm=T) )
#}else{
# l_s <- lapply(1:length(l_s_ukie), function(i) stackApply(l_s_ukie[[i]], indices=rep(1:12, 2), fun=sum, na.rm=T) )
#}
# write final output to annual files
v_fname <- writeOutput(v_year = startYear:endYear, l_s, pollutant)
return(v_fname)
}
##* WIP - Ireland has bigger CO2 emission than UK - units diff, CO2 not C?
##* mostly sorted - inclusion of duplicate totals in UNFCCC file - using Iriah EPA data instead
##* years <- 2010:2019
##* l_res <- lapply(X=years, function(x) fun = sapply(c("ch4","n2o","co2"), test_agreement, v_snap_id = 1:n_sectors, year = x))
##* names(l_res) <- as.character(years)
test_agreement <- function(v_snap_id = 1:n_sectors, pollutant, year){
# call unit conversion to return pollutant-specific molWt
molWt <- unit_conversion(pollutant, "mol")$molWt
dt_ts_uk <- tar_read(dt_ts_uk)
dt_ts_ie <- tar_read(dt_ts_ie)
F_total_uk <- sum(dt_ts_uk[pollutant == get("pollutant", envir = parent.frame(3)) &
year == get("year", envir = parent.frame(3)) &
snap_id %in% v_snap_id, emission])
F_total_ie <- sum(dt_ts_ie[pollutant == get("pollutant", envir = parent.frame(3)) &
year == get("year", envir = parent.frame(3)) &
snap_id %in% v_snap_id, emission])
# UK data already so converted from C to CO2 ##
F_total_uk <- set_units(F_total_uk, Tg / yr)
F_total_ie <- set_units(F_total_ie, Tg / yr)
F_total_ts <- F_total_uk + F_total_ie
# total from time series
F_total_ts
# read output file
fname <- paste0("./data/output/s_F_", pollutant, "_", year, ".tif")
s <- stack(fname)
# name layers to 1 to 11, ignoring marine
names(s)[1:n_sectors] <- v_sector
# subset to selected sectors
s <- s[[v_snap_id]]
# get cell area in m2
r_area <- terra::area(s, sum=FALSE) * 1e6
#cellStats(r_area, sum) / 1e6
# s is flux in mol m-2 s-1 x area in m2 -> mol s-1
F_total_sp <- sum(cellStats(s * r_area, sum))
# convert mol to g
F_total_sp <- set_units(F_total_sp * molWt, g / s)
F_total_sp <- set_units(F_total_sp, Tg / yr)
return(F_total_sp / F_total_ts)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.