knitr::opts_chunk$set(echo = FALSE) # by default turn off code echo
# Set start time ----
startTime <- proc.time()

# Local parameters ----

GREENGridData::setup() # set package parameters mostly from ggrParams.R

# Local functions ----

\newpage

About

Report circulation:

License


Citation

If you wish to use any of the material from this report please cite as:

This work is (c) r lubridate::year(today()) the University of Southampton.

History

Requirements:

This report uses the safe version of the grid spy 1 minute data which has been processed using the code in https://github.com/CfSOtago/GREENGridData/tree/master/dataProcessing/gridSpy. It also assumes you have already run the example circuit extraction script using circuit = r params$circuit.

Support


\newpage

Introduction


This report provides summary analysis of one circuit type (r params$circuit) across all households as an example.

Load data

The data used to generate this report is:

First we load the household data. readr will give some feedback on the columns.

if(file.exists(hhFile)){
  hhDT <- data.table::as.data.table(readr::read_csv(hhFile)) # load hh data
  data.table::setkey(hhDT, linkID)
} else {
  print(paste0("Failed to find ", hhFile," - is the data source available?"))
  } 

Next we load the Grid Spy extract for r params$circuit. This uses a GREENGridData package function intended to load the cleaned individual household data which warns that two of the column names are not found. These columns were dropped during the extraction process so we can safely ignore these warnings

gsDT <- GREENGridData::getCleanGridSpyFile(gsFile) # load Grid Spy data
data.table::setkey(gsDT, linkID)
t <- head(gsDT)

kableExtra::kable(t, caption = paste0("First few rows of grid spy data")) %>%
  kable_styling()

Table \@ref(tab:loadGsData) shows the first few rows of the Grid Spy 1 minute power data.

t <- summary(gsDT)

kableExtra::kable(t, caption = paste0("Summary of grid spy data")) %>%
  kable_styling()

# create some useful derived date & time variables
gsDT <- gsDT[, obsTime := hms::as.hms(r_dateTime)] # HH:MM for demand profile plots

Table \@ref(tab:summaryGsData) shows a summary of the Grid Spy 1 minute power data.

Note that:

Plot seasonal mean power profiles

First we create a Southern Hemisphere season variable. Luckily we have a function to do this in the GREENGridData package. We print a check table to ensure we are all happy with the coding of season.

gsDT <- GREENGridData::addNZSeason(gsDT)
table(lubridate::month(gsDT$r_dateTime, label = TRUE), gsDT$season, useNA = "always")

Overall profiles

This section plots overall mean power per minute by season.

plotDT <- gsDT[, .(meanW = mean(powerW)), keyby = .(season, obsTime)
             ]

# set attributes for plot
vLineAlpha <- 0.4
vLineCol <- "#0072B2" # http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/#a-colorblind-friendly-palette
timeBreaks <- c(hms::as.hms("04:00:00"), 
                hms::as.hms("08:00:00"),
                hms::as.hms("12:00:00"),
                hms::as.hms("16:00:00"),
                hms::as.hms("20:00:00"),
                hms::as.hms("24:00:00")
)

# create default caption
myCaption <- paste0("GREENGrid Grid Spy household electricity demand data (https://dx.doi.org/10.5255/UKDA-SN-853334)",
                        "\n", min(gsDT$r_dateTime), 
                        " to ", max(gsDT$r_dateTime),
                        "\nTime = Pacific/Auckland",
                        "\n (c) ", lubridate::year(now())," University of Otago")

myPlot <- ggplot2::ggplot(plotDT[!is.na(season)], # make sure no un-set seasons/non-parsed dates
                          aes(x = obsTime, y = meanW/1000, colour = season)) +
  geom_line() + 
  scale_colour_manual(values=ggrParams$cbPalette) + # use colour-blind friendly palette
  theme(strip.text.y = element_text(angle = 0, vjust = 0.5, hjust = 0.5)) + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + 
  guides(colour = guide_legend(title = "Season: ")) +
  theme(legend.position = "bottom")  + 
  labs(title = paste0(params$circuit, ": seasonal mean power demand profiles"),
       y = "Mean kW per minute", 
       x = "Time of day",
       caption = myCaption
       )

myPlot + 
  scale_x_time(breaks = timeBreaks) +
  geom_vline(xintercept = timeBreaks, alpha = vLineAlpha, colour = vLineCol)

ggplot2::ggsave(paste0(ggrParams$repoLoc,"/examples/outputs/", params$circuit, "_meankWperminBySeason.png"))

Figure \@ref(fig:makePlot) shows the overall mean kW per minute in each season for this circuit (r params$circuit).

Profiles by linked household attributes

This section plots overall mean power per minute by season and number of children aged 0-12 as an illustration of how to link the Grid Spy and household data. We will go through the steps with commentary and showing the code...

Table \@ref(tab:hhTable) the number of households who have different numbers of children aged 0-12 so we know how many households make up each line on the plot. This table includes households where we do not know the number of children (NA) but we do have electricity demand data.

t <- hhDT[, .(Freq = .N), keyby = nChildren0_12]

kableExtra::kable(t, caption = paste0("Number of households with children aged 0-12")) %>%
  kable_styling()

Now we link (join) the Grid Spy and household data.tables and aggregate (summarise) by season and number of children aged 0-12. You can do this using data.table's on the fly join but we have found pre-joining of the columns you want to be much faster. We have wrapped both methods inside system.time() calls for comparison.

You can probably also do this in dplyr etc but we haven't tried.

print("data.table join on the fly (version 1 - slow):")
system.time(plotDT <- gsDT[hhDT][, .(meanW = mean(powerW)), keyby = .(season, obsTime, nChildren0_12)
             ]) # aggregate by season, time and n children aged 0 - 12

# print("data.table join on the fly (version 2):")
# # appears to be as specified in https://cran.r-project.org/web/packages/data.table/vignettes/datatable-faq.html#MergeDiff
# # but fails to find nChildren0_12
# system.time(plotDT <- gsDT[hhDT, .(meanW = mean(powerW)), keyby = .(season, obsTime, nChildren0_12)
#              ]) # aggregate by season, time and n children aged 0 - 12

aggFunc <- function(gsDT, hhDT){
  # do it in stages instead
  keepCols <- c("linkID", "nChildren0_12")
  mergedDT <- gsDT[hhDT[, ..keepCols]]
  dt <- mergedDT[, .(meanW = mean(powerW)), keyby = .(season, obsTime, nChildren0_12)]
  return(dt)
}
print("data.table join then aggregate:")
system.time(plotDT <- aggFunc(gsDT, hhDT))

Now use the aggregated data.table to make the plot. Note that as specified this will add a line for nChildren0_12 == NA household(s) - see Table \@ref(tab:hhTable).

myPlot <- ggplot2::ggplot(plotDT[!is.na(season)], # make sure no un-set seasons/non-parsed dates
                          aes(x = obsTime, y = meanW/1000, 
                              colour = as.factor(nChildren0_12))) +
  geom_line() + 
  scale_colour_manual(values=ggrParams$cbPalette) + # use colour-blind friendly palette
  facet_grid(season  ~ .) + 
  theme(strip.text.y = element_text(angle = 0, vjust = 0.5, hjust = 0.5)) + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + 
  guides(colour = guide_legend(title = "Number of children aged 0 - 12: ")) +
  theme(legend.position = "bottom")  + 
  labs(title = paste0(params$circuit, ": seasonal mean power demand profiles by n children aged 0-12"),
       y = "Mean kW per minute",
       x = "Time of day",
       caption = myCaption
       )

myPlot + 
  scale_x_time(breaks = timeBreaks) +
  geom_vline(xintercept = timeBreaks, alpha = vLineAlpha, colour = vLineCol)

ggplot2::ggsave(paste0(ggrParams$repoLoc,"/examples/outputs/", params$circuit, "_meankWperminBySeason_nKids0-12.png"))

Figure \@ref(fig:makeKidsPlot) shows the mean kW per minute per season by presence of young children for this circuit (r params$circuit). Can you see anything interesting or unusual and might this be due to the numbers of households in each group?

Runtime

t <- proc.time() - startTime

elapsed <- t[[3]]

Analysis completed in r round(elapsed,2) seconds ( r round(elapsed/60,2) minutes) using knitr in RStudio with r R.version.string running on r R.version$platform.

R environment

R packages used

Session info

sessionInfo()

References



CfSOtago/GREENGridData documentation built on June 10, 2022, 8:03 p.m.