knitr::opts_chunk$set(echo = FALSE) # by default turn off code echo
# Set start time ----
startTime <- proc.time()

# Local libraries ----
library(dplyr)
library(ggplot2) # plots
library(kableExtra) # fancy kable
library(skimr) # skim
library(viridis) # cb friendly colours

# Local parameters ----

# set attributes for plot
vLineAlpha <- 0.4
vLineCol <- "#0072B2" # http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/#a-colorblind-friendly-palette
timeBreaks <- c(hms::as.hms("04:00:00"), 
                hms::as.hms("08:00:00"),
                hms::as.hms("12:00:00"),
                hms::as.hms("16:00:00"),
                hms::as.hms("20:00:00"),
                hms::as.hms("24:00:00")
)

# Local functions ----

seasonalProfilePlot <- function(dt, xvar, yvar){
  # assumes dt has seasons
  p <- ggplot2::ggplot(dt[!is.na(season)], # make sure no un-set seasons/non-parsed dates
                       aes(x = get(xvar), 
                           y = get(yvar),
                           colour = linkID)) + # colours by household
    geom_point() + 
    scale_colour_viridis(discrete=TRUE) + # use colour-blind friendly palette
    theme(strip.text.y = element_text(angle = 0, vjust = 0.5, hjust = 0.5)) + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + 
    guides(colour = guide_legend(title = "Season: ")) +
    theme(legend.position = "bottom")  + 
    labs(title = paste0("Seasonal mean total power demand profiles"),
         y = "set this yourself", 
         x = "Time of day",
         caption = myCaption
    )

  p <- p + 
    scale_x_time(breaks = timeBreaks) +
    geom_vline(xintercept = timeBreaks, alpha = vLineAlpha, colour = vLineCol) +
    facet_grid(season ~ .) # make seasons
  return(p)
}

\newpage

About

Report circulation:

License


Citation

If you wish to use any of the material from this report please cite as:

This work is (c) r lubridate::year(today()) the University of Otago

History

Requirements:

This report uses the safe version of the grid spy 1 minute data which has been processed using the code in https://github.com/CfSOtago/GREENGridData/tree/master/dataProcessing/gridSpy.

Support


\newpage

Introduction


This report provides summary analysis of the imputed total demand for each household. The imputation was done using:

Load data

hhDT <- drake::readd(hhData) # bring back from wherever drake put it
setkey(hhDT, linkID)
circuitsDT <- drake::readd(circuits) # bring back from wherever drake put it
powerDT <- drake::readd(powerData) # bring back from wherever drake put it
#  create some useful derived date & time variables
powerDT <- powerDT[, r_dateTime_nz := lubridate::as_datetime(r_dateTime, # stored as UTC
                                                        tz = "Pacific/Auckland")] # so we can extract within NZ dateTime
setkey(powerDT, linkID, r_dateTime)
t <- head(powerDT) # get head here

powerDT <- powerDT[, obsTimeHMS := hms::as.hms(r_dateTime_nz)] # HH:MM for demand profile plots
powerDT <- powerDT[, obsTimeHalfHour := hms::trunc_hms(obsTimeHMS, 60*30)]

kableExtra::kable(t, caption = paste0("First few rows of grid spy data")) %>%
  kable_styling()

Table \@ref(tab:readdPowerData) shows the first few rows of the Grid Spy 1 minute power data. Table \@ref(tab:summaryPowerData) shows a summary of the Grid Spy 1 minute power data (for more detail see Section \@ref(powerSkim)).

t <- summary(powerDT)

kableExtra::kable(t, caption = paste0("Summary of grid spy data")) %>%
  kable_styling()

Note that:

Table \@ref(tab:checkHouseholds) shows the summaries for each household as imputed using r params$circuitsFile. Note that the presence of a PV inverter may only be known from the appliance summary and/or the household data.

The number of households excluded will depend on the circuits file used:

hhID_hDT <- hhDT[, .(linkID, hasApplianceSummary, 
                    pvInverter = `PV Inverter`, energyStorage = `Energy Storage`,
                    otherGenDevice = `Other Generation Device`, notes)
                 ] # all linkIDs in the household data
hhID_hDT$hasHhData <- "Yes"
hhID_pDT <- powerDT[, .(nObs = .N, meanW = mean(powerW), minW = min(powerW), maxW = max(powerW)),
                    keyby = .(linkID)] # all linkIDs in the power data
hhID_pDT$powerDataSource <- "Has power data"
# NB - there may be some linkIDs with no power data but household data & vice versa
hhID_DT <- merge(hhID_hDT, hhID_pDT, by = "linkID", all = TRUE)

circuitsDF <- dplyr::as_tibble(cbind(linkID = names(circuitsDT), 
                                      t(circuitsDT))) # the circuits table we used in the imputation - need to tranpose so we can link to the other data by linkID
circuitsLongDT <- data.table::as.data.table(circuitsDF) # back to dt
setnames(circuitsLongDT, c(V1 = "linkID", 
                       V2 = "Circuit 1", 
                       V3 = "Circuit 2", 
                       V4 = "Circuit 3")) # rename for clarity
setkey(circuitsLongDT, linkID)
hhID_DT <- circuitsLongDT[hhID_DT] # add circuits to all IDs

hhID_DT <- hhID_DT[is.na(hasApplianceSummary), hasApplianceSummary := "No"]
hhID_DT <- hhID_DT[hasApplianceSummary == "No" & is.na(pvInverter), pvInverter := "Not known"]
hhID_DT <- hhID_DT[hasApplianceSummary == "Yes" & is.na(pvInverter), pvInverter := "No"] # we know they do not
hhID_DT <- hhID_DT[hasApplianceSummary == "No" & is.na(energyStorage), energyStorage := "Not known"]
hhID_DT <- hhID_DT[hasApplianceSummary == "Yes" & is.na(energyStorage), energyStorage := "No"] # we know they do not
hhID_DT <- hhID_DT[is.na(notes) , notes := ""]
hhID_DT <- hhID_DT[is.na(hasHhData) , hasHhData := "No"]

kableExtra::kable(hhID_DT[, .(linkID, hasHhData, hasApplianceSummary, pvInverter, energyStorage,
                              notes, nObs, minW, maxW,
                              `Circuit 1`,`Circuit 2`,`Circuit 3`)], caption = paste0("Summary of monitored households (NA indicates data not included in ", circuitsFile ,")")) %>%
  kable_styling() # report all the households

Test 0 and -ve power value incidence

Figure \@ref(fig:testPowerTileObs) shows the incidence of -ve overall imputed power values by linkID (household) and date. As we can see they tend to be concentrated in a few households.

myCaption <- "GREEN Grid Data: Imputed total household power demand"

plotDT <- powerDT[powerW <= 0, .(nObs = .N), 
                  keyby = .(Date = lubridate::as_date(r_dateTime), linkID)]
setkey(plotDT, linkID)
plotDT <- plotDT[hhID_DT]

p <- ggplot2::ggplot(plotDT, aes(x = Date, y = linkID, fill = nObs)) +
  geom_tile() +
  scale_fill_viridis() +
  labs(caption = myCaption)

plotDT <- plotDT[is.na(pvInverter), pvInverter := "No or unkown"]

p

t <- plotDT[, .(nObs = sum(nObs, na.rm = TRUE),
                nHouseholds = uniqueN(linkID)), keyby = .(pvInverter)]

kableExtra::kable(t, caption = "Number of observations of 0 or -ve total W by presence of PV") %>%
  kable_styling()

Table \@ref(tab:testPowerTileObs) (-ve and 0 observations by PV inverter presence) shows whether or not this is likely to be a PV related problem in the dataset defined by . This is confirmed by Table @\ref(tab:testPV) which shows the summary data from Table \@ref(tab:testPowerTileObs) but for just those we know to have PV inverters. If this version of the circuits file has excluded them then the power data will be all NA.

kableExtra::kable(hhID_DT[pvInverter == "yes", .(linkID, pvInverter, energyStorage,
                              notes, nObs, minW, maxW,
                              `Circuit 1`,`Circuit 2`,`Circuit 3`)], caption = paste0("Summary of monitored households (NA indicates data not included in ", circuitsFile ,")")) %>%
  kable_styling() # report all the households

Figure \@ref(fig:testPowerTileMean) shows the mean overall imputed power values by linkID (household) and date where power <= 0. As we can see they tend to be concentrated in a few households with some reporting persistent -ve values even though they do not appear to have PV.

plotDT <- powerDT[powerW <= 0, .(meanWh = mean(powerW)), 
                  keyby = .(Date = lubridate::as_date(r_dateTime), linkID)]
setkey(plotDT, linkID)
plotDT <- plotDT[hhID_DT]

plotDT <- plotDT[is.na(pvInverter), pvInverter := "No or unkwown"]

p <- ggplot2::ggplot(plotDT, aes(x = Date, y = linkID, fill = meanWh)) +
  geom_tile() +
  scale_fill_viridis() +
  labs(caption = myCaption)

p

Test power profiles

Next we test the power profiles by season to see if there is a seasonal pattern to the negatuive values which might indicate unreported PV (for example).

First we create a Southern Hemisphere season variable. We have a function to do this in the GREENGridData package. We print a check table to ensure we are all happy with the coding of season.

powerDT <- GREENGridData::addNZSeason(powerDT)
table(lubridate::month(powerDT$r_dateTime, label = TRUE), powerDT$season, useNA = "always")

Figure \@ref(fig:makeMeanPlot) plots overall mean power per minute by season and household id.

# create default caption
myCaption <- paste0("GREENGrid Grid Spy household electricity demand data (https://dx.doi.org/10.5255/UKDA-SN-853334)",
                        "\n", min(powerDT$r_dateTime), 
                        " to ", max(powerDT$r_dateTime),
                        "\nTime = Pacific/Auckland",
                        "\n (c) ", lubridate::year(Sys.Date())," University of Otago")


plotDT <- powerDT[, .(meanW = mean(powerW)), keyby = .(season, 
                                                obsTimeHalfHour, 
                                                linkID)
             ]
setkey(plotDT, linkID)
setkey(hhID_DT, linkID)
plotDT <- hhID_DT[, .(linkID, pvInverter)][plotDT]

p <- seasonalProfilePlot(plotDT, xvar = 'obsTimeHalfHour', 
                         yvar = 'meanW')
p + labs(y = "Mean W per minute") + facet_wrap(. ~ pvInverter)

If a household has PV this should be clearly visible as a -ve curve centered on ~ 12:00 - 13:00.

plotDT <- powerDT[powerW <= 0, .(minW = min(powerW)), keyby = .(season, 
                                                obsTimeHalfHour, 
                                                linkID)
             ]

setkey(plotDT, linkID)
setkey(hhID_DT, linkID)
plotDT <- hhID_DT[, .(linkID, pvInverter)][plotDT]

p <- seasonalProfilePlot(plotDT, xvar = 'obsTimeHalfHour', 
                         yvar = 'minW')
p <- p + labs(y = "Min W per minute") + facet_wrap(. ~ pvInverter)

p # ggplot

p <- p + labs(caption = "Interactive plotly version")

#plotly::ggplotly(p) # useful for debugging - breaks under drake (why?)

Figure \@ref(fig:makeMinPlot) repeats this analysis but for minimum power per minute by season and household id and plots only those values that are <= 0 to clarify time of day effects on -ve values.

Implications

When using the 'total demand' derived data we suggest data users:

Seperately, for reasons explained elsewhere we recommend removing rf_46 from the data prior to analysis.

In all cases we recommend that users check the data carefully before analysis and document any filtering they apply!

Statistical Annex

Power data {#powerSkim}

skimr::skim(powerDT)

Household data {#hhSkim}

skimr::skim(hhDT)

Runtime

t <- proc.time() - startTime

elapsed <- t[[3]]

Analysis completed in r round(elapsed,2) seconds ( r round(elapsed/60,2) minutes) using knitr in RStudio with r R.version.string running on r R.version$platform.

R environment

R packages used

Session info

sessionInfo()

References



CfSOtago/GREENGridData documentation built on June 10, 2022, 8:03 p.m.