knitr::opts_chunk$set(echo = FALSE) # by default turn off code echo
# Set start time ----
startTime <- proc.time()
# Local parameters ----
b2Kb <- 1024 #http://whatsabyte.com/P1/byteconverter.htm
b2Mb <- 1048576
plotLoc <- paste0(repoParams$repoLoc, "/docs/plots/") # where to put the plots

# Packages used in the report ----
rmdLibs <- c("ggplot2", # plots
          "kableExtra" # fancy tables
          )
GREENGridEECA::loadLibraries(rmdLibs)
# Local functions ----

\newpage

About

Citation


Report circulation

License {#license}


History


Support


\newpage

Introduction

This report uses the Electricity Authority EMI grid export database to:

This report describes the results of this work and directs the reader to relevant R code where necessary.

All code used to create this report is available from:

The archived and most recent version of the report is available from:

Data

Grid exit point (GXP)

A grid exit point (GXP) is defined in Part 1 of the Code and means any point of connection on the grid at which electricity predominantly flows out of the grid or is determined as being such by the Authority following an application in accordance with clause 13.28. Any such point of connection may, at any given time, be a GXP or a GIP, but may not be both at the same time."

For (very little) further information see the EMI glossary.

# add the POC lookup table Vince sent ----
# this tells us which POCs are in the regions we are interested in
f <- paste0(here::here(), "/data/input/gxp-lookup.csv")
gxpLutDT <- data.table::fread(f)

setkey(gxpLutDT, node)
setkey(gxpDataDT, POC) 

gxpDataDT <- gxpLutDT[gxpDataDT] # merge kWh to label data - this keeps all the gxpData

gxpDataDT[, regionName := `region name`]

# set peak period - used later
gxpDataDT <- GREENGridEECA::codePeakPeriod(gxpDataDT) # NA are DST breaks?
t <- table(gxpDataDT$hour, gxpDataDT$peakPeriod, useNA = "always")
# t
# yes :-)

# add useful time vars ----
gxpDataDT[, month := lubridate::month(date)]
gxpDataDT[, year := lubridate::year(date)]
gxpDataDT[, hms := hms::as_hms(rDateTime)]
gxpDataDT <- GREENGridEECA::addNZSeason(gxpDataDT) # add season

message("Matched regions:")
table(gxpDataDT$regionName, useNA = "always")
# remove the date NAs here (DST breaks)
gxpDataDT <- gxpDataDT[!is.na(date)]

# remove any kWh NA
gxpDataDT <- gxpDataDT[!is.na(kWh)]

Start date: r min(gxpDataDT$date)

End date: r max(gxpDataDT$date)

t <- table(gxpDataDT$month, gxpDataDT$year)
kableExtra::kable(t, digits = 2,
                  caption = "Number of observations per month and year in data used for this report") %>%
  kable_styling()

Explore GXP data

Figure \@ref(fig:profiles) shows the mean total network electricity export by season and year for all regions combined as a data sense-check.

myCaption <- paste0("Source: EA EMI Grid Exit Point (GXP) data",
                    ", ", min(gxpDataDT$date, na.rm = TRUE)
                    , " - ", max(gxpDataDT$date, na.rm = TRUE))

plotDT <- gxpDataDT[, .(sumkWh = sum(kWh), meankWh = mean(kWh), 
                        nGXPs = uniqueN(node), nDays = uniqueN(date)), 
                    keyby = .(hms, season, year)]
plotDT[,totDaily := sumkWh/(nDays)]

ggplot2::ggplot(plotDT, aes(x = hms, y = totDaily/1000, colour = season)) +
  geom_point() +
  facet_grid(. ~ year) +
  labs(x = "Date", y = "Total daily MWh per half-hour", 
       caption = paste0(myCaption, "\nN GXPs: ", uniqueN(gxpDataDT$node))) +
  scale_color_discrete(name = "Season:") +
  theme(legend.position="bottom")

Next we use the GXP data to identify local (regional) system peak load half-hours for Hawke's Bay and Taranaki as this matches the GREEN Grid household sample location. Selected GXPs for the regions we are interested in:

t <- gxpDataDT[!is.na(regionName), .(nHalfHours = .N,
                               meanMWh = mean(kWh, na.rm = TRUE)/1000),
                           keyby = .(regionName, node, NWK_Code, year)]

kableExtra::kable(t, digits = 2,
                  caption = "Summary of MWh per half hour by year, region, GXP nodes selected and network codes") %>%
  kable_styling()

Note that NPL0331 in Taranaki has two network codes - presumably it feeds two networks? In what follows we simply sum the kWh for all half-hours for all nodes for each region.

Figure \@ref(fig:regionProfiles) shows the same results as above but only for the regions of interest: Taranaki and Hawke's Bay.

dt <- gxpDataDT[region == "t" | region == "h"]

plotDT <- dt[, .(sumkWh = sum(kWh), meankWh = mean(kWh), nDays = uniqueN(date)), 
                    keyby = .(hms, season, region, regionName, year)]
plotDT[, totDaily := sumkWh/(nDays)]
ggplot2::ggplot(plotDT, aes(x = hms, y = totDaily/1000, colour = season)) +
  geom_point() +
  facet_grid(year ~ regionName) +
  labs(x = "Date", y = "Total daily MWh per half-hour", 
       caption = paste0(myCaption, "\nN GXPs: ", uniqueN(dt$node))) +
  scale_color_discrete(name = "Season:") +
  theme(legend.position="bottom")

Top 100 'peaks' in 2015

Extract data for winter (Jun - August) 2015 as per original EECA file. Aggregate to region level and select the top 100 half hours for each region.

gxpWinter2015DT <- gxpDataDT[year == 2015 & 
                                 date >= "2015-06-01" & # Vince's extract seems to start 1st June
                                   date <= "2015-08-31"] # and end 31st August 

  summary(gxpWinter2015DT)

  gxpWinter2015DT[,regionName := `region name`]

  # the NA in TP = 49 is expected, this is the DST break placeholder
  t <- gxpWinter2015DT[is.na(kWh), .(node, Time_Period, rDateTime, kWh)]
  head(t, 10)
  # check
  summary(t)
  # yep, all NA
  # so let's just kick it out
  gxpWinter2015DT <- gxpWinter2015DT[!is.na(kWh)]

  # grab the GXPs we want
  gxpSelectWinter2015DT <- gxpWinter2015DT[!is.na(regionName)]

  table(gxpSelectWinter2015DT$node, gxpSelectWinter2015DT$regionName, useNA = "always")
  # one of them seems to have double the observations. Why?
  table(gxpSelectWinter2015DT$node, gxpSelectWinter2015DT$NWK_Code, useNA = "always")
  # it has 2 network codes. Why?


  # so it seems to be small but we should include it as the GXP must be feeding 2 networks?
  # checks before aggreating

  skimr::skim(gxpSelectWinter2015DT)
  # to confirm
  table(gxpSelectWinter2015DT$NWK_Code)
  table(gxpSelectWinter2015DT$FLOW_DIRECTION)
  table(gxpSelectWinter2015DT$GENERATION_TYPE)
  # OK, what does that mean?
  table(gxpSelectWinter2015DT$TRADER)

  # check for NA
  summary(gxpSelectWinter2015DT)
  head(gxpSelectWinter2015DT[is.na(kWh)], 10)
  # collapse them by region
  regionSumGxpDT <- gxpSelectWinter2015DT[, .(sumkWh = sum(kWh),
                                              nObs = .N,
                                              nRegions = uniqueN(regionName),
                                              nLocations = uniqueN(node),
                                              nNetworks = uniqueN(NWK_Code),
                                              nGenTypes = uniqueN(GENERATION_TYPE)), # number of half-hourly records contributing
                                    keyby = .(region, regionName, Time_Period, hours, rDateTime, weekdays, peakPeriod)]

  regionSumGxpDT[, month := lubridate::month(rDateTime, label = TRUE)]
  regionSumGxpDT[, hms := hms::as_hms(rDateTime)]

  summary(regionSumGxpDT)

  # find the top 10 for each region
  taranakiDT <- head(regionSumGxpDT[region == "t"][order(-sumkWh)], 100)
  table(taranakiDT$peakPeriod, taranakiDT$month)
  # save it: NB saves rDateTime as UTC
  data.table::fwrite(taranakiDT, paste0(here::here(), "/data/taranakiGxpTop100DT.csv"))
  taranakiDT[, .(nPeaks = .N), keyby = .(regionName, hours, Time_Period)]
  # almost completely matches Vince's extract but the kWh values here are _exactly_ 1/2 of Vince's. Why? Ans: because he multiplied by 2!

  hawkesBayDT <- head(regionSumGxpDT[region == "h"][order(-sumkWh)], 100)
  table(hawkesBayDT$peakPeriod, hawkesBayDT$month)
  # save it: NB saves rDateTime as UTC
  hawkesBayDT[, .(nPeaks = .N), keyby = .(regionName, hours, Time_Period)]
  # almost completely matches Vince's extract but the kWh values here are _exactly_ 1/2 of Vince's. Why?

  data.table::fwrite(hawkesBayDT, paste0(here::here(), "/data/hawkesBayGxpTop100DT.csv"))

Hawke's Bay

Figure \@ref(fig:hbTile) shows the total network electricity export in this region as a data sense-check.

ggplot2::ggplot(regionSumGxpDT[region == "h"], aes(x = lubridate::date(rDateTime), y = hms, fill = sumkWh/1000)) +
  geom_tile() +
  facet_grid(regionName ~ .,scales = "free_y") +
  scale_fill_continuous(low = "green", high = "red", name = "Total MWh per half-hour") +
  labs(x = "Date", y = "Half-hour", caption = paste0(myCaption)) +
   theme(legend.position="bottom")

Table \@ref(tab:hbTable) shows the top 10 peak demand periods.

t <- regionSumGxpDT[region == "h", .(regionName, Time_Period, hms, rDateTime, weekdays, peakPeriod, sumkWh)]
kableExtra::kable(head(t[order(-sumkWh)], 10), caption = "Top 10 of top 100 Hawke's Bay peak demand half-hours") %>%
  kable_styling()

Table \@ref(tab:hbTableTP) shows when the top 100 peak demand periods occured.

dt <- head(regionSumGxpDT[region == "h"][order(-sumkWh)], 100)
#nrow(dt)
t <- dt[, .(nHalfHours = .N), keyby = .(Time_Period, hms, weekdays, peakPeriod)]
kableExtra::kable(t[order(-nHalfHours)], caption = "Hawke's Bay peak demand half-hours") %>%
  kable_styling()

Taranaki

Figure \@ref(fig:tTile) shows the total network electricity export in this region as a data sense-check.

ggplot2::ggplot(regionSumGxpDT[region == "t"], aes(x = lubridate::date(rDateTime), y = hms, fill = sumkWh/1000)) +
  geom_tile() +
  facet_grid(regionName ~ .,scales = "free_y") +
  scale_fill_continuous(low = "green", high = "red", name = "Total MWh per half-hour") +
  labs(x = "Date", y = "Half-hour", caption = paste0(myCaption)) +
   theme(legend.position="bottom")

Table \@ref(tab:tTable) shows the top 10 peak demand periods.

t <- regionSumGxpDT[region == "t", .(regionName, Time_Period, hms, rDateTime, weekdays, peakPeriod, sumkWh)]
kableExtra::kable(head(t[order(-sumkWh)], 10), caption = "Top 10 of top 100 Taranaki peak demand half-hours") %>%
  kable_styling()

Table \@ref(tab:tTableTP) shows when the 100 peak demand periods occured.

dt <- head(regionSumGxpDT[region == "t"][order(-sumkWh)], 100)
#nrow(dt)
t <- dt[, .(nHalfHours = .N), keyby = .(Time_Period, hms, weekdays, peakPeriod)]
kableExtra::kable(t[order(-nHalfHours)], caption = "Taranaki peak demand half-hours") %>%
  kable_styling()

Summary

This report used the EA EMI GXP data to:

This report described the results of this work and directed the reader to relevant R code where necessary.

All code used to create this report is available from:

The archived and most recent version of the report is available from:

The selected half-hours can be found in the github repo.


Data Annex

GXP summary

Descriptive statistics for original EA GXP data:

skimr::skim(gxpDataDT)

Runtime

t <- proc.time() - startTime
elapsed <- t[[3]]

Analysis completed in r round(elapsed,2) seconds ( r round(elapsed/60,2) minutes) using knitr in RStudio with r R.version.string running on r R.version$platform.

R environment

R packages used

Session info

sessionInfo()

References



CfSOtago/GREENGridEECA documentation built on May 1, 2022, 8:08 p.m.