knitr::opts_chunk$set(echo = FALSE) # by default turn off code echo
# Set start time ---- startTime <- proc.time() # Local parameters ---- GREENGridData::setup() # set package parameters mostly from ggrParams.R # Local functions ----
\newpage
If you wish to use any of the material from this report please cite as:
r lubridate::year(today())
) r paste0(params$title,params$circuit)
. r ggrParams$pubLoc
.This work is (c) r lubridate::year(today())
the University of Southampton.
This report uses the safe version of the grid spy 1 minute data which has been processed using the code in https://github.com/CfSOtago/GREENGridData/tree/master/dataProcessing/gridSpy. It also assumes you have already run the example circuit extraction script using circuit = r params$circuit
.
\newpage
This report provides summary analysis of one circuit type (r params$circuit
) across all households as an example.
The data used to generate this report is:
r gsFile
r hhFile
First we load the household data. readr
will give some feedback on the columns.
if(file.exists(hhFile)){ hhDT <- data.table::as.data.table(readr::read_csv(hhFile)) # load hh data data.table::setkey(hhDT, linkID) } else { print(paste0("Failed to find ", hhFile," - is the data source available?")) }
Next we load the Grid Spy extract for r params$circuit
. This uses a GREENGridData
package function intended to load the cleaned individual household data which warns that two of the column names are not found. These columns were dropped during the extraction process so we can safely ignore these warnings
gsDT <- GREENGridData::getCleanGridSpyFile(gsFile) # load Grid Spy data data.table::setkey(gsDT, linkID) t <- head(gsDT) kableExtra::kable(t, caption = paste0("First few rows of grid spy data")) %>% kable_styling()
Table \@ref(tab:loadGsData) shows the first few rows of the Grid Spy 1 minute power data.
t <- summary(gsDT) kableExtra::kable(t, caption = paste0("Summary of grid spy data")) %>% kable_styling() # create some useful derived date & time variables gsDT <- gsDT[, obsTime := hms::as.hms(r_dateTime)] # HH:MM for demand profile plots
Table \@ref(tab:summaryGsData) shows a summary of the Grid Spy 1 minute power data.
Note that:
First we create a Southern Hemisphere season variable. Luckily we have a function to do this in the GREENGridData
package. We print a check table to ensure we are all happy with the coding of season
.
gsDT <- GREENGridData::addNZSeason(gsDT) table(lubridate::month(gsDT$r_dateTime, label = TRUE), gsDT$season, useNA = "always")
This section plots overall mean power per minute by season.
plotDT <- gsDT[, .(meanW = mean(powerW)), keyby = .(season, obsTime) ] # set attributes for plot vLineAlpha <- 0.4 vLineCol <- "#0072B2" # http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/#a-colorblind-friendly-palette timeBreaks <- c(hms::as.hms("04:00:00"), hms::as.hms("08:00:00"), hms::as.hms("12:00:00"), hms::as.hms("16:00:00"), hms::as.hms("20:00:00"), hms::as.hms("24:00:00") ) # create default caption myCaption <- paste0("GREENGrid Grid Spy household electricity demand data (https://dx.doi.org/10.5255/UKDA-SN-853334)", "\n", min(gsDT$r_dateTime), " to ", max(gsDT$r_dateTime), "\nTime = Pacific/Auckland", "\n (c) ", lubridate::year(now())," University of Otago") myPlot <- ggplot2::ggplot(plotDT[!is.na(season)], # make sure no un-set seasons/non-parsed dates aes(x = obsTime, y = meanW/1000, colour = season)) + geom_line() + scale_colour_manual(values=ggrParams$cbPalette) + # use colour-blind friendly palette theme(strip.text.y = element_text(angle = 0, vjust = 0.5, hjust = 0.5)) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + guides(colour = guide_legend(title = "Season: ")) + theme(legend.position = "bottom") + labs(title = paste0(params$circuit, ": seasonal mean power demand profiles"), y = "Mean kW per minute", x = "Time of day", caption = myCaption ) myPlot + scale_x_time(breaks = timeBreaks) + geom_vline(xintercept = timeBreaks, alpha = vLineAlpha, colour = vLineCol) ggplot2::ggsave(paste0(ggrParams$repoLoc,"/examples/outputs/", params$circuit, "_meankWperminBySeason.png"))
Figure \@ref(fig:makePlot) shows the overall mean kW per minute in each season for this circuit (r params$circuit
).
This section plots overall mean power per minute by season and number of children aged 0-12 as an illustration of how to link the Grid Spy and household data. We will go through the steps with commentary and showing the code...
Table \@ref(tab:hhTable) the number of households who have different numbers of children aged 0-12 so we know how many households make up each line on the plot. This table includes households where we do not know the number of children (NA) but we do have electricity demand data.
t <- hhDT[, .(Freq = .N), keyby = nChildren0_12] kableExtra::kable(t, caption = paste0("Number of households with children aged 0-12")) %>% kable_styling()
Now we link (join) the Grid Spy and household data.tables and aggregate (summarise) by season and number of children aged 0-12. You can do this using data.table
's on the fly join but we have found pre-joining of the columns you want to be much faster. We have wrapped both methods inside system.time()
calls for comparison.
You can probably also do this in dplyr
etc but we haven't tried.
print("data.table join on the fly (version 1 - slow):") system.time(plotDT <- gsDT[hhDT][, .(meanW = mean(powerW)), keyby = .(season, obsTime, nChildren0_12) ]) # aggregate by season, time and n children aged 0 - 12 # print("data.table join on the fly (version 2):") # # appears to be as specified in https://cran.r-project.org/web/packages/data.table/vignettes/datatable-faq.html#MergeDiff # # but fails to find nChildren0_12 # system.time(plotDT <- gsDT[hhDT, .(meanW = mean(powerW)), keyby = .(season, obsTime, nChildren0_12) # ]) # aggregate by season, time and n children aged 0 - 12 aggFunc <- function(gsDT, hhDT){ # do it in stages instead keepCols <- c("linkID", "nChildren0_12") mergedDT <- gsDT[hhDT[, ..keepCols]] dt <- mergedDT[, .(meanW = mean(powerW)), keyby = .(season, obsTime, nChildren0_12)] return(dt) } print("data.table join then aggregate:") system.time(plotDT <- aggFunc(gsDT, hhDT))
Now use the aggregated data.table
to make the plot. Note that as specified this will add a line for nChildren0_12 == NA household(s) - see Table \@ref(tab:hhTable).
myPlot <- ggplot2::ggplot(plotDT[!is.na(season)], # make sure no un-set seasons/non-parsed dates aes(x = obsTime, y = meanW/1000, colour = as.factor(nChildren0_12))) + geom_line() + scale_colour_manual(values=ggrParams$cbPalette) + # use colour-blind friendly palette facet_grid(season ~ .) + theme(strip.text.y = element_text(angle = 0, vjust = 0.5, hjust = 0.5)) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + guides(colour = guide_legend(title = "Number of children aged 0 - 12: ")) + theme(legend.position = "bottom") + labs(title = paste0(params$circuit, ": seasonal mean power demand profiles by n children aged 0-12"), y = "Mean kW per minute", x = "Time of day", caption = myCaption ) myPlot + scale_x_time(breaks = timeBreaks) + geom_vline(xintercept = timeBreaks, alpha = vLineAlpha, colour = vLineCol) ggplot2::ggsave(paste0(ggrParams$repoLoc,"/examples/outputs/", params$circuit, "_meankWperminBySeason_nKids0-12.png"))
Figure \@ref(fig:makeKidsPlot) shows the mean kW per minute per season by presence of young children for this circuit (r params$circuit
). Can you see anything interesting or unusual and might this be due to the numbers of households in each group?
t <- proc.time() - startTime elapsed <- t[[3]]
Analysis completed in r round(elapsed,2)
seconds ( r round(elapsed/60,2)
minutes) using knitr in RStudio with r R.version.string
running on r R.version$platform
.
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.