knitr::opts_chunk$set(echo = FALSE) # by default turn off code echo
#knitr::opts_chunk$set(results = "asis") # fixes html table for Word
# Set start time ----
startTime <- proc.time()
# Local parameters ----
b2Kb <- 1024 #http://whatsabyte.com/P1/byteconverter.htm
b2Mb <- 1048576
plotLoc <- paste0(repoParams$repoLoc, "/docs/plots/") # where to put the plots

# Packages used in the report ----
rmdLibs <- c("ggplot2", # plots
          "kableExtra" # fancy tables
          )
GREENGridEECA::loadLibraries(rmdLibs)
# Local functions ----

\newpage

About

Citation


Report circulation

License {#license}


History


Support


\newpage

Introduction

This report uses the full New Zealand GREEN Grid household electricity demand study research data to:

This report describes the results of this work and directs the reader to relevant R code where necessary.

All code used to create this report is available from:

The archived and most recent version of the report is available from:

Data


Multi-year data availability

Whilst the GREEN Grid project collected data from r min(lubridate::date(halfHourlyPowerDT$r_dateTimeHalfHour)) to r max(lubridate::date(halfHourlyPowerDT$r_dateTimeHalfHour)), we do not have complete multi-year coverage of the r uniqueN(halfHourlyPowerDT$linkID) dwellings for whom data exists.

As Figure \@ref(fig:reportMyTile) shows data is available for most of the r uniqueN(hhDataDT[Location == "Taranaki"]) dwellings in the Taranaki region from mid 2014 and for most of the r uniqueN(hhDataDT[Location == "Hawkes Bay"]) dwellings in Hawkes Bay from early 2015. In most cases the 'right' number of observations were received per half hour (30) when the dwellings were sending data. However not all dwellings sent data continuously with substantial attrition by 2017 (Figure \@ref(fig:reportMyCol)).

myCaption <- "Source: GREEN Grid dwelling power demand data"

plotDT <- halfHourlyPowerDT[, .(meanNObs = mean(nObs)), 
                    keyby = .(linkID, 
                              date = lubridate::date(halfHourlyPowerDT$r_dateTimeHalfHour))]
setkey(plotDT, linkID)
setkey(hhDataDT, linkID)
plotDT <- hhDataDT[plotDT]
plotDT[, Location := ifelse(is.na(Location), "Test installs", Location)] # gs data but no survey etc

ggplot2::ggplot(plotDT[!(Location %like% "Test")], aes(x = date, y = linkID, fill = meanNObs)) +
  geom_tile() +
  facet_grid(Location ~ .,scales = "free_y") +
  scale_fill_continuous(low = "red", high = "green", name = "Mean n observations per circuit\nper half hour") +
  labs(x = "Date", y = "ID", caption = paste0(myCaption,"\n 2 test installs removed (rf_01, rf_02)")) +
   theme(legend.position="bottom")
plotDT <- plotDT[, .(nDwellings = uniqueN(linkID)), 
                    keyby = .(Location, date)]


ggplot2::ggplot(plotDT[!(Location %like% "Test")], aes(x = date, y = nDwellings, fill = Location)) +
  geom_col(position = "stack") +
  labs(x = "Date", y = "N dwellings", 
       caption = paste0(myCaption,
                        "\n 2 test installs removed (rf_01, rf_02)")) +
   theme(legend.position="bottom")

For clarity, Figure \@ref(fig:reportSeasonal) shows the mean daily number of dwellings present in the data in each year and season for each region. It is clear that 2015 provides the highest level of reporting dwellings, however Figure \@ref(fig:reportSeasonal) also shows that it is possible to calculate seasonal summaries for several years.

dt <- GREENGridEECA::addNZSeason(plotDT, date)
dt <- dt[, year := lubridate::year(date)]
t <- dt[, .(meanDwellings = mean(nDwellings)), keyby = .(year, season, Location)]
ggplot2::ggplot(t, aes(x = season, y = meanDwellings, fill = Location)) +
  geom_col() +
  labs(x = "Season", y = "Mean number of dwellings per season",
       caption = paste0(myCaption, "\nAll dwellings")) +
  facet_grid(. ~ year) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

However, it should be noted that:

As a result the effective number of dwellings available for any given analysis will always be lower than the numbers reported above and should be evaluated on a case by case basis.

Imputation of 'total load' per minute per dwelling {#imputeTotal}

Whilst in theory the calculation of total load in any given minute should be a matter of merely summing all monitored circuits, in practice the task is not quite this simple. There are a number of reasons for this:

Our previously work has shown that with appropriate care it is possible to derive best effort estimates of total dwelling power load by summing a small number of particular circuits for each dwelling [@GREENGridTotalLoad]. However the issues described above mean that users should:

Separately, for reasons explained elsewhere we also recommend removing rf_46 from the data prior to analysis.

In all cases we recommend that users check the data carefully before analysis and document any filtering they apply.

We have therefore created a new dataset for each dwelling which comprises the estimated total load per minute for each dwelling for the entire time frame for which we have data (ref Figure \@ref(fig:reportMyCol)). For ease of use this data is available as:

Note that we have not applied the exclusion rules described above. In order to ensure all data is available if required, these rules should only be applied just prior to analysis. The files are listed in Table \@ref(tab:listLoadFiles) in the Data Annex Section \@ref(totalLoadFiles). Whilst we can make these data files available, potential users should note that they are larger than the original data files. The single file containing only the estimated total load per minute per dwelling is especially large.

Table \@ref(tab:allLoadDesc) shows basic statistics for the estimated 1 minute level load for each dwelling and illustrates some of the issues described above.

t <- impDataDT[, .(nObs = GREENGridData::tidyNum(.N),
                   meanW = mean(powerW),
                   minW = min(powerW),
                   maxW = max(powerW),
                   sdW = sd(powerW)), keyby = (linkID)]

setkey(t, linkID)
t2 <- hhDataDT[, .(linkID, `PV Inverter`, Location)][t]
t2[, Location := ifelse(is.na(Location), "Test installs", Location)] # gs data but no survey etc

kableExtra::kable(t2, caption = "Basic statistics for the estimated 1 minute level load (W) for each dwelling", digits = 2) %>%
  kable_styling()

Development of a half-hourly power demand dataset

In response to EECA's request we have used the per-dwelling files listed in Table \@ref(tab:listLoadFiles) and the code available from our github repo to produce an aggregated half-hourly power demand dataset for each dwelling.

These files contain:

Table \@ref(tab:headhhFile) shows the first few rows of one of these files where:

kableExtra::kable(head(origHalfHourlyPowerDT), caption = "Half hourly data format with example data", digits = 2) %>%
  kable_styling()

The resulting files are listed in Table \@ref(tab:listhhFiles) in the Data Annex Section \@ref(hhFiles).

Further, these per-dwelling files have been used to attempt to create single data files containing all observations for the circuits or (partial) circuit labels set out in the following sections. Table \@ref(tab:getCircuitLabels) in Section \@ref(circuitLabels) of the Data Annex shows the unique circuit labels available as a guide to what can be meaningfully extracted. The process of extraction uses partial string matching so, for example, the string Lighting would match to circuits with the following labels:

but not:

Note that the process may therefore match a number of circuits and in some cases circuits may contain other appliances. Analysis should therefore proceed with caution since some circuits may have been missed that were required and some extracted which were not. For the avoidance of doubt, the code used to extract these circuits should be checked against Table \@ref(tab:getCircuitLabels) in Section \@ref(circuitLabels) of the Data Annex.

halfHourlyPowerDT <- GREENGridEECA::labelEECACircuits(halfHourlyPowerDT)
# head(halfHourlyPowerDT)

In each section we extract and save the relevant half-hourly observations and provide two plots:

Lighting

filter <- "Lighting"
dt <- halfHourlyPowerDT[circuitLabel %like% filter] # extraction

In this section we extract every record where:

Table \@ref(tab:extractHalfHourLighting) shows summary statistics of the half-hourly mean values for all observations extracted by circuit label.

makeTable <- function(dt){
  t <- dt[, .(nObs = .N,
              nDwellings = uniqueN(linkID),
              meanW = mean(meanPowerW, na.rm = TRUE),
              minW = min(meanPowerW, na.rm = TRUE),
              maxW = max(meanPowerW, na.rm = TRUE),
              sdW = sd(meanPowerW, na.rm = TRUE)
              ),
                  keyby = .(circuitLabel)]
  message("# -> ", nrow(dt), " rows of data for ", 
          uniqueN(dt$circuitLabel), " unique circuit labels from ", 
          uniqueN(dt$linkID), " dwellings covering the period \n# -> ",
          min(dt$r_dateTimeHalfHour), " to ", max(dt$r_dateTimeHalfHour),
          " (NB: data may not be continuous)")
  return(t)
}

t <- makeTable(dt)

kableExtra::kable(t, digits = 3, 
                  caption = paste0("Summary statistics for extract")) %>%
  kable_styling()

of <- paste0(repoParams$GreenGridData, "/gridSpy/halfHour/extracts/halfHourLighting.csv")
data.table::fwrite(dt, of)
GREENGridEECA::gzipIt(of)

makeTilePlot <- function(dt){
  plotDT <- dt[, .(meanW = mean(meanPowerW),
                   nObs = .N), keyby = .(r_dateTimeHalfHour, circuitLabel)]
  p <- ggplot2::ggplot(plotDT, aes(x = r_dateTimeHalfHour, 
                                   y = circuitLabel, 
                                   fill = meanW/1000,
                                   alpha = nObs)) +
    geom_tile() +
    theme_bw() +
    scale_alpha_continuous(name = "N dwellings per half-hour") +
    scale_fill_continuous(name = "Mean power (kW)", low = "green", high = "red") +
    labs(x = "Half hour", y = "Circuit label", caption = "N dwellings (transparency) indicates\n the number of dwellings with a given circuit label") +
    theme(legend.position="bottom") + # otherwise gets squidged in word version
    # https://ggplot2.tidyverse.org/reference/guide_legend.html
    guides(fill = guide_legend(title.position = "top")) + # stacks the circuit labels and puts title on top
    guides(alpha = guide_legend(title.position = "top"))  # puts title on top
  return(p)
}


makeProfilePlot <- function(dt){
  dt[, halfHour := hms::trunc_hms(hms::as.hms(r_dateTimeHalfHour), 30*60)] # add hms as half hours for plots
  dt[, date := lubridate::date(dt$r_dateTimeHalfHour)] # add date for season
  dt <- GREENGridEECA::addNZSeason(dt) # add season
  plotDT <- dt[, .(meanW = mean(meanPowerW),
                   nObs = uniqueN(linkID)), keyby = .(halfHour, circuitLabel, season)]
  p <- ggplot2::ggplot(plotDT, aes(x = halfHour, 
                                   y = meanW/1000, 
                                   colour = circuitLabel,
                                   alpha = nObs)) +
    geom_line()  +
    theme_bw() +
    scale_colour_discrete(name = "Matching circuit labels") +
    scale_alpha_continuous(name = "N dwellings per half hour per label") +
    facet_grid(season ~ .) +
    theme(legend.position="bottom") + # otherwise gets squidged in word version
    guides(colour = guide_legend(ncol = 2, title.position = "top")) + # stacks the circuit labels and puts title on top
    guides(alpha = guide_legend(title.position = "top")) + # puts title on top
    labs(x = "Half hour", y = "Mean power (kW)", caption = "N dwellings (shown using transparency) indicates\nthe number of dwellings with a given circuit label")
  return(p)
}

makeTilePlot(dt)
makeProfilePlot(dt)

We therefore have Lighting data for r sum(t$nDwellings) dwellings.

Hot water

# more complex (we could de-capitalise first)
filter1 <- "Hot water" # or
filter2 <- "Hot Water"

dt <- halfHourlyPowerDT[circuitLabel %like% filter1 |
                                circuitLabel %like% filter2]

In this section we extract every record where:

Table \@ref(tab:extractHalfHourHotWater) shows the mean power (mean of the half-hourly mean values) for all observations extracted by circuit label.

t <- makeTable(dt)

kableExtra::kable(t, digits = 3, 
                  caption = paste0("Summary statistics for extract")) %>%
  kable_styling()

of <- paste0(repoParams$GreenGridData, "/gridSpy/halfHour/extracts/halfHourHotWater.csv")
data.table::fwrite(dt, of)
GREENGridEECA::gzipIt(of)

makeTilePlot(dt)
makeProfilePlot(dt)

We therefore have Hot Water data for r sum(t$nDwellings) dwellings.

Heat pumps

filter1 <- "Heat Pump"

dt <- halfHourlyPowerDT[circuitLabel %like% filter1]

In this section we extract every record where:

Table \@ref(tab:extractHalfHourHeatPump) shows the mean power (mean of the half-hourly mean values) for all observations extracted by circuit label.

t <- makeTable(dt)

kableExtra::kable(t, digits = 3, 
                  caption = paste0("Summary statistics for extract")) %>%
  kable_styling()

of <- paste0(repoParams$GreenGridData, "/gridSpy/halfHour/extracts/halfHourHeatPump.csv")
data.table::fwrite(dt, of)
GREENGridEECA::gzipIt(of)

makeTilePlot(dt)
makeProfilePlot(dt)

We therefore have Heat Pump data for r sum(t$nDwellings) dwellings.

Kitchen

Noting that this may include other areas of the dwelling...

filter1 <- "Kitchen"

dt <- halfHourlyPowerDT[circuitLabel %like% filter1]

In this section we extract every record where:

Table \@ref(tab:extractHalfHourKitchen) and shows the mean power (mean of the half-hourly mean values) for all observations extracted by circuit label.

t <- makeTable(dt)

kableExtra::kable(t, digits = 3, 
                  caption = paste0("Summary statistics for extract")) %>%
  kable_styling()

of <- paste0(repoParams$GreenGridData, "/gridSpy/halfHour/extracts/halfHourKitchen.csv")
data.table::fwrite(dt, of)
GREENGridEECA::gzipIt(of)

makeTilePlot(dt)
p <- makeProfilePlot(dt)
p + theme(legend.position="bottom") +
  guides(colour=guide_legend(ncol=3))

We therefore have Kitchen data for r sum(t$nDwellings) dwellings.

Non-heat pump ‘Heat’

Noting that this circuit label may include other appliances...

# more complex
filter1 <- "Heat"
# and not "Heat Pump" 
dt <- halfHourlyPowerDT[circuitLabel %like% filter1 &
                      !circuitLabel %like% "Heat Pump"]

In this section we extract every record where:

Table \@ref(tab:extractHalfHourHeat) shows the mean power (mean of the half-hourly mean values) for all observations extracted by circuit label.

t <- makeTable(dt)

kableExtra::kable(t, digits = 3, 
                  caption = paste0("Summary statistics for extract")) %>%
  kable_styling()

of <- paste0(repoParams$GreenGridData, "/gridSpy/halfHour/extracts/halfHourNonHP_Heat.csv")
data.table::fwrite(dt, of)
GREENGridEECA::gzipIt(of)

makeTilePlot(dt)
makeProfilePlot(dt)

We therefore have non Heat Pump Heat data for r sum(t$nDwellings) dwellings.

Refrigerator/Fridge

Noting that this circuit label may include other appliances...

filter1 <- "Fridge"

dt <- halfHourlyPowerDT[circuitLabel %like% filter1]

In this section we extract every record where:

Table \@ref(tab:extractHalfHourFridge) shows the mean power (mean of the half-hourly mean values) for all observations extracted by circuit label.

t <- makeTable(dt)

kableExtra::kable(t, digits = 3, 
                  caption = paste0("Summary statistics for extract")) %>%
  kable_styling()

of <- paste0(repoParams$GreenGridData, "/gridSpy/halfHour/extracts/halfHourFridge.csv")
data.table::fwrite(dt, of)
GREENGridEECA::gzipIt(of)

makeTilePlot(dt)
makeProfilePlot(dt)

We therefore have Fridge data for r sum(t$nDwellings) dwellings.

Freezer

Noting that this circuit label may include other appliances...

filter1 <- "Freezer"

dt <- halfHourlyPowerDT[circuitLabel %like% filter1]

In this section we extract every record where:

Table \@ref(tab:extractHalfHourFreezer) shows the mean power (mean of the half-hourly mean values) for all observations extracted by circuit label.

t <- makeTable(dt)

kableExtra::kable(t, digits = 3, 
                  caption = paste0("Summary statistics for extract")) %>%
  kable_styling()

of <- paste0(repoParams$GreenGridData, "/gridSpy/halfHour/extracts/halfHourFreezer.csv")
data.table::fwrite(dt, of)
GREENGridEECA::gzipIt(of)

makeTilePlot(dt)
makeProfilePlot(dt)

We therefore have Freezer data for r sum(t$nDwellings) dwellings.

Oven

Noting that this circuit label may include other appliances...

filter1 <- "Oven"

dt <- halfHourlyPowerDT[circuitLabel %like% filter1]

In this section we extract every record where:

Table \@ref(tab:extractHalfHourOven) shows the mean power (mean of the half-hourly mean values) for all observations extracted by circuit label.

t <- makeTable(dt)

kableExtra::kable(t, digits = 3, 
                  caption = paste0("Summary statistics for extract")) %>%
  kable_styling()

of <- paste0(repoParams$GreenGridData, "/gridSpy/halfHour/extracts/halfHourOven.csv")
data.table::fwrite(dt, of)
GREENGridEECA::gzipIt(of)

makeTilePlot(dt)
makeProfilePlot(dt)

We therefore have Oven data for r sum(t$nDwellings) dwellings.

Photovoltaic panels

Noting that this circuit label may include other appliances...

filter1 <- "PV"

dt <- halfHourlyPowerDT[circuitLabel %like% filter1]

In this section we extract every record where:

Table \@ref(tab:extractHalfHourPV) shows the mean power (mean of the half-hourly mean values) for all observations extracted by circuit label.

t <- makeTable(dt)

kableExtra::kable(t, digits = 3, 
                  caption = paste0("Summary statistics for extract")) %>%
  kable_styling()

of <- paste0(repoParams$GreenGridData, "/gridSpy/halfHour/extracts/halfHourPV.csv")
data.table::fwrite(dt, of)
GREENGridEECA::gzipIt(of)
p <- makeTilePlot(dt)
p + scale_fill_continuous(low = "red", high = "green") # reverse colours

makeProfilePlot(dt)

We therefore have PV data for r sum(t$nDwellings) dwellings. It is interesting to note that most of the PV output for the dwelling with PV & Storage (rf_23) appears, on average, to be absorbed by its storage.

Total load

dt <- halfHourlyPowerDT[circuitLabel %like% "imputedTotalDemand"]

In this section we extract every record where:

Table \@ref(tab:extractHalfHourtotalLoad) shows the mean power (mean of the half-hourly mean values) for all observations extracted by circuit label.

t <- makeTable(dt)

kableExtra::kable(t, digits = 3, 
                  caption = paste0("Summary statistics for extract")) %>%
  kable_styling()

of <- paste0(repoParams$GreenGridData, "/gridSpy/halfHour/extracts/halfHourImputedTotalDemand.csv")
data.table::fwrite(dt, of)
GREENGridEECA::gzipIt(of)
p <- makeTilePlot(dt)
p + theme(axis.text.y = element_text(angle = 90, hjust = 0.5))

makeProfilePlot(dt)

We therefore have imputedTotalDemand data for r sum(t$nDwellings) dwellings.

Summary

This report used the full New Zealand GREEN Grid household electricity demand study research data to:

This report described the results of this work and directed the reader to relevant R code where necessary.

All code used to create this report is available from:

The archived and most recent version of the report is available from:

The half-hourly mean power demand data together with the circuit level extracts can be made available on request.


Data Annex

1 minute estimated total load files {#totalLoadFiles}

kableExtra::kable(impfilesDT[all.files %like% "v1.1", .(file = all.files, "Mb (gzipped)" = fSizeMb)], caption = "Size of clean 1 minute data files with estimated total load", digits = 2) %>%
  kable_styling()

Half hour aggregate files {#hhFiles}

kableExtra::kable(halfHourlyFilesDT[, .(file = all.files, "Mb (gzipped)" = fSizeMb)], caption = "Size of clean half hourly data files", digits = 2) %>%
  kable_styling()

Circuit labels {#circuitLabels}

All labels

Table \@ref(tab:getCircuitLabels) shows all circuit labels and reports the number of dwellings with those circuit labels and descriptive statistics for half hourly mean power. Note that they include all labels for all households even those which we recoomend are excluded from subsequent analysis (see Section \@ref(imputeTotal)). The table is ordered by the most frequent (Hot Water) but it should be noted that the labels were recorded by the installing engineers and so small variations in labels for similar appliances should be expected.

t <- halfHourlyPowerDT[!(circuitLabel %like% "imputed"), # exclude total
                       .(nObs = .N,
                      nDwellings = uniqueN(linkID),
                      meanW = mean(meanPowerW, na.rm = TRUE),
                      minW = min(meanPowerW, na.rm = TRUE),
                      maxW = max(meanPowerW, na.rm = TRUE)),
                  keyby = .(circuitLabel)]

kableExtra::kable(t[order(-nDwellings)], digits = 2,
                  caption = "Summary statistics by circuit label") %>%
  kable_styling()

# save as a data file for ease of use
of <- paste0(repoParams$repoLoc, "/data/output/circuitLabelsComplete.csv")
data.table::fwrite(t, file = of)

All labels with derived coding

Table \@ref(tab:getCircuitLabelsByEECA) shows all the coding of circuit labels used to define:

These circuit codings are used in Part B and note that they may vary slightly from those used in earlier sections of this report. Note that they include all labels for all households even those which we recoomend are excluded from subsequent analysis (see Section \@ref(imputeTotal)).

We have deliberately used a conservative coding approach so that circuits with multiple appliances and/or which are non-specific power circuits have been coded as XX_Other since we are unable to allocate load to specific appliances or usages. For the avoidance of doubt, readers should review the function used to create these labels. The table reports the number of dwellings with those circuit labels and descriptive statistics for half hourly mean power. Clearly it would be possible to:

The former is straight foward but the latter would require substantial research and it is unclear how robust the results would be.

t <- halfHourlyPowerDT[!(circuitLabel %like% "imputed"), .(nObs = .N,
                      nDwellings = uniqueN(linkID),
                      meanW = mean(meanPowerW, na.rm = TRUE),
                      minW = min(meanPowerW, na.rm = TRUE),
                      maxW = max(meanPowerW, na.rm = TRUE)),
                  keyby = .(eecaCircuit,circuitLabel)]

kableExtra::kable(t, digits = 2,
                  caption = "Summary statistics by EECA circuit label and original circuit label") %>%
  kable_styling()

# save as a data file for ease of use
of <- paste0(repoParams$repoLoc, "/data/output/circuitLabelsCompleteByEECA.csv")
data.table::fwrite(t, file = of)

Half hourly total load summary

Descriptive statistics for aggregate half hourly power data for all dwellings and all circuits:

skimr::skim(halfHourlyPowerDT)

Per dwelling summaries of half-hourly power data

The following tables show descriptive statistics for the meanPowerW values for each circuit by dwelling.

ids <- unique(halfHourlyPowerDT$linkID)

for(hh in ids){
  # prints a lot of tables

  message("#-> Dwelling: ", hh)

    t <- data.table::cube(halfHourlyPowerDT[linkID == hh], j = c(list(meanPowerW = mean(meanPowerW, na.rm = TRUE),
                                                              minPowerW = min(meanPowerW, na.rm = TRUE),
                                                              maxPowerW = max(meanPowerW, na.rm = TRUE),
                                                              nObs = .N)), by = c("circuit"))
    # NA in circuit column of ersults table = All
    t[, circuit := ifelse(is.na(circuit), "All", circuit)]
    print(kableExtra::kable(t, caption = paste0(hh, ": Mean of half-hourly mean power (W) by circuit type")) %>% 
      kable_styling())
}

Runtime

t <- proc.time() - startTime
elapsed <- t[[3]]

Analysis completed in r round(elapsed,2) seconds ( r round(elapsed/60,2) minutes) using knitr in RStudio with r R.version.string running on r R.version$platform.

R environment

R packages used

Session info

sessionInfo()

References



CfSOtago/GREENGridEECA documentation built on May 1, 2022, 8:08 p.m.