knitr::opts_chunk$set(echo = FALSE) # by default turn off code echo
# Set start time ---- startTime <- proc.time() # Local libraries ---- library(dplyr) library(ggplot2) # plots library(kableExtra) # fancy kable library(skimr) # skim library(viridis) # cb friendly colours # Local parameters ---- # set attributes for plot vLineAlpha <- 0.4 vLineCol <- "#0072B2" # http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/#a-colorblind-friendly-palette timeBreaks <- c(hms::as.hms("04:00:00"), hms::as.hms("08:00:00"), hms::as.hms("12:00:00"), hms::as.hms("16:00:00"), hms::as.hms("20:00:00"), hms::as.hms("24:00:00") ) # Local functions ---- seasonalProfilePlot <- function(dt, xvar, yvar){ # assumes dt has seasons p <- ggplot2::ggplot(dt[!is.na(season)], # make sure no un-set seasons/non-parsed dates aes(x = get(xvar), y = get(yvar), colour = linkID)) + # colours by household geom_point() + scale_colour_viridis(discrete=TRUE) + # use colour-blind friendly palette theme(strip.text.y = element_text(angle = 0, vjust = 0.5, hjust = 0.5)) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + guides(colour = guide_legend(title = "Season: ")) + theme(legend.position = "bottom") + labs(title = paste0("Seasonal mean total power demand profiles"), y = "set this yourself", x = "Time of day", caption = myCaption ) p <- p + scale_x_time(breaks = timeBreaks) + geom_vline(xintercept = timeBreaks, alpha = vLineAlpha, colour = vLineCol) + facet_grid(season ~ .) # make seasons return(p) }
\newpage
If you wish to use any of the material from this report please cite as:
r lubridate::year(today())
) r paste0(params$title,": ", params$subtitle)
. r ggrParams$pubLoc
.This work is (c) r lubridate::year(today())
the University of Otago
This report uses the safe version of the grid spy 1 minute data which has been processed using the code in https://github.com/CfSOtago/GREENGridData/tree/master/dataProcessing/gridSpy.
\newpage
This report provides summary analysis of the imputed total demand for each household. The imputation was done using:
r params$circuitsFile
hhDT <- drake::readd(hhData) # bring back from wherever drake put it setkey(hhDT, linkID)
circuitsDT <- drake::readd(circuits) # bring back from wherever drake put it
powerDT <- drake::readd(powerData) # bring back from wherever drake put it # create some useful derived date & time variables powerDT <- powerDT[, r_dateTime_nz := lubridate::as_datetime(r_dateTime, # stored as UTC tz = "Pacific/Auckland")] # so we can extract within NZ dateTime setkey(powerDT, linkID, r_dateTime) t <- head(powerDT) # get head here powerDT <- powerDT[, obsTimeHMS := hms::as.hms(r_dateTime_nz)] # HH:MM for demand profile plots powerDT <- powerDT[, obsTimeHalfHour := hms::trunc_hms(obsTimeHMS, 60*30)] kableExtra::kable(t, caption = paste0("First few rows of grid spy data")) %>% kable_styling()
Table \@ref(tab:readdPowerData) shows the first few rows of the Grid Spy 1 minute power data. Table \@ref(tab:summaryPowerData) shows a summary of the Grid Spy 1 minute power data (for more detail see Section \@ref(powerSkim)).
t <- summary(powerDT) kableExtra::kable(t, caption = paste0("Summary of grid spy data")) %>% kable_styling()
Note that:
Table \@ref(tab:checkHouseholds) shows the summaries for each household as imputed using r params$circuitsFile
. Note that the presence of a PV inverter may only be known from the appliance summary and/or the household data.
The number of households excluded will depend on the circuits file used:
hhID_hDT <- hhDT[, .(linkID, hasApplianceSummary, pvInverter = `PV Inverter`, energyStorage = `Energy Storage`, otherGenDevice = `Other Generation Device`, notes) ] # all linkIDs in the household data hhID_hDT$hasHhData <- "Yes" hhID_pDT <- powerDT[, .(nObs = .N, meanW = mean(powerW), minW = min(powerW), maxW = max(powerW)), keyby = .(linkID)] # all linkIDs in the power data hhID_pDT$powerDataSource <- "Has power data" # NB - there may be some linkIDs with no power data but household data & vice versa hhID_DT <- merge(hhID_hDT, hhID_pDT, by = "linkID", all = TRUE) circuitsDF <- dplyr::as_tibble(cbind(linkID = names(circuitsDT), t(circuitsDT))) # the circuits table we used in the imputation - need to tranpose so we can link to the other data by linkID circuitsLongDT <- data.table::as.data.table(circuitsDF) # back to dt setnames(circuitsLongDT, c(V1 = "linkID", V2 = "Circuit 1", V3 = "Circuit 2", V4 = "Circuit 3")) # rename for clarity setkey(circuitsLongDT, linkID) hhID_DT <- circuitsLongDT[hhID_DT] # add circuits to all IDs hhID_DT <- hhID_DT[is.na(hasApplianceSummary), hasApplianceSummary := "No"] hhID_DT <- hhID_DT[hasApplianceSummary == "No" & is.na(pvInverter), pvInverter := "Not known"] hhID_DT <- hhID_DT[hasApplianceSummary == "Yes" & is.na(pvInverter), pvInverter := "No"] # we know they do not hhID_DT <- hhID_DT[hasApplianceSummary == "No" & is.na(energyStorage), energyStorage := "Not known"] hhID_DT <- hhID_DT[hasApplianceSummary == "Yes" & is.na(energyStorage), energyStorage := "No"] # we know they do not hhID_DT <- hhID_DT[is.na(notes) , notes := ""] hhID_DT <- hhID_DT[is.na(hasHhData) , hasHhData := "No"] kableExtra::kable(hhID_DT[, .(linkID, hasHhData, hasApplianceSummary, pvInverter, energyStorage, notes, nObs, minW, maxW, `Circuit 1`,`Circuit 2`,`Circuit 3`)], caption = paste0("Summary of monitored households (NA indicates data not included in ", circuitsFile ,")")) %>% kable_styling() # report all the households
Figure \@ref(fig:testPowerTileObs) shows the incidence of -ve overall imputed power values by linkID (household) and date. As we can see they tend to be concentrated in a few households.
myCaption <- "GREEN Grid Data: Imputed total household power demand" plotDT <- powerDT[powerW <= 0, .(nObs = .N), keyby = .(Date = lubridate::as_date(r_dateTime), linkID)] setkey(plotDT, linkID) plotDT <- plotDT[hhID_DT] p <- ggplot2::ggplot(plotDT, aes(x = Date, y = linkID, fill = nObs)) + geom_tile() + scale_fill_viridis() + labs(caption = myCaption) plotDT <- plotDT[is.na(pvInverter), pvInverter := "No or unkown"] p t <- plotDT[, .(nObs = sum(nObs, na.rm = TRUE), nHouseholds = uniqueN(linkID)), keyby = .(pvInverter)] kableExtra::kable(t, caption = "Number of observations of 0 or -ve total W by presence of PV") %>% kable_styling()
Table \@ref(tab:testPowerTileObs) (-ve and 0 observations by PV inverter presence) shows whether or not this is likely to be a PV related problem in the dataset defined by . This is confirmed by Table @\ref(tab:testPV) which shows the summary data from Table \@ref(tab:testPowerTileObs) but for just those we know to have PV inverters. If this version of the circuits file has excluded them then the power data will be all NA.
kableExtra::kable(hhID_DT[pvInverter == "yes", .(linkID, pvInverter, energyStorage, notes, nObs, minW, maxW, `Circuit 1`,`Circuit 2`,`Circuit 3`)], caption = paste0("Summary of monitored households (NA indicates data not included in ", circuitsFile ,")")) %>% kable_styling() # report all the households
Figure \@ref(fig:testPowerTileMean) shows the mean overall imputed power values by linkID (household) and date where power <= 0. As we can see they tend to be concentrated in a few households with some reporting persistent -ve values even though they do not appear to have PV.
plotDT <- powerDT[powerW <= 0, .(meanWh = mean(powerW)), keyby = .(Date = lubridate::as_date(r_dateTime), linkID)] setkey(plotDT, linkID) plotDT <- plotDT[hhID_DT] plotDT <- plotDT[is.na(pvInverter), pvInverter := "No or unkwown"] p <- ggplot2::ggplot(plotDT, aes(x = Date, y = linkID, fill = meanWh)) + geom_tile() + scale_fill_viridis() + labs(caption = myCaption) p
Next we test the power profiles by season to see if there is a seasonal pattern to the negatuive values which might indicate unreported PV (for example).
First we create a Southern Hemisphere season variable. We have a function to do this in the GREENGridData
package. We print a check table to ensure we are all happy with the coding of season
.
powerDT <- GREENGridData::addNZSeason(powerDT) table(lubridate::month(powerDT$r_dateTime, label = TRUE), powerDT$season, useNA = "always")
Figure \@ref(fig:makeMeanPlot) plots overall mean power per minute by season and household id.
# create default caption myCaption <- paste0("GREENGrid Grid Spy household electricity demand data (https://dx.doi.org/10.5255/UKDA-SN-853334)", "\n", min(powerDT$r_dateTime), " to ", max(powerDT$r_dateTime), "\nTime = Pacific/Auckland", "\n (c) ", lubridate::year(Sys.Date())," University of Otago") plotDT <- powerDT[, .(meanW = mean(powerW)), keyby = .(season, obsTimeHalfHour, linkID) ] setkey(plotDT, linkID) setkey(hhID_DT, linkID) plotDT <- hhID_DT[, .(linkID, pvInverter)][plotDT] p <- seasonalProfilePlot(plotDT, xvar = 'obsTimeHalfHour', yvar = 'meanW') p + labs(y = "Mean W per minute") + facet_wrap(. ~ pvInverter)
If a household has PV this should be clearly visible as a -ve curve centered on ~ 12:00 - 13:00.
plotDT <- powerDT[powerW <= 0, .(minW = min(powerW)), keyby = .(season, obsTimeHalfHour, linkID) ] setkey(plotDT, linkID) setkey(hhID_DT, linkID) plotDT <- hhID_DT[, .(linkID, pvInverter)][plotDT] p <- seasonalProfilePlot(plotDT, xvar = 'obsTimeHalfHour', yvar = 'minW') p <- p + labs(y = "Min W per minute") + facet_wrap(. ~ pvInverter) p # ggplot p <- p + labs(caption = "Interactive plotly version") #plotly::ggplotly(p) # useful for debugging - breaks under drake (why?)
Figure \@ref(fig:makeMinPlot) repeats this analysis but for minimum power per minute by season and household id and plots only those values that are <= 0 to clarify time of day effects on -ve values.
When using the 'total demand' derived data we suggest data users:
Seperately, for reasons explained elsewhere we recommend removing rf_46 from the data prior to analysis.
In all cases we recommend that users check the data carefully before analysis and document any filtering they apply!
skimr::skim(powerDT)
skimr::skim(hhDT)
t <- proc.time() - startTime elapsed <- t[[3]]
Analysis completed in r round(elapsed,2)
seconds ( r round(elapsed/60,2)
minutes) using knitr in RStudio with r R.version.string
running on r R.version$platform
.
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.