#' toolSolarFunctionAggregate
#'
#' Aggregate Solar data into regions
#'
#' @param x magclass object that should be aggregated
#' @param rel relation matrix containing a region mapping.
#' A mapping object should contain 2 columns in which each element of x
#' is mapped to the category it should belong to after (dis-)aggregation
#' @param weight aggregation weight (should be FE|Electricity (EJ/yr) in 2015)
#' @return return: returns region aggregated solar data
#'
#' @author Felix Schreyer, Renato Rodrigues, Julian Oeser
#' @export
#' @importFrom magclass is.magpie as.data.frame as.magpie collapseNames add_columns getSets getItems
#' setItems collapseDim
#' @importFrom dplyr %>% mutate select rename filter left_join group_by ungroup arrange summarise desc
#' lag full_join
#' @importFrom tidyr spread gather complete
#' @importFrom quitte as.quitte
#' @importFrom stats weighted.mean
#' @importFrom utils packageVersion
toolSolarFunctionAggregate <- function(x, rel=NULL, weight = calcOutput("FE", aggregate = FALSE)[,"y2015","FE|Electricity (EJ/yr)"]){
# old part by Julian Oeser
# aggregate to regions
x <- toolAggregate(x,rel)
getSets(x)[1] <- "Region"
### split up pv in (needed?)
# PVall (install PV everywhere),
# PVcomp (install PV only where also csp could be installed) (FS: Why should one want to do this???),
# PVonly (install PV only where no csp can be installed)
# installable area for csp, pv in region
area.pv <- dimSums(x[,,"area"][,,"PV"][,,c("0-50", "50-100")], dim=c(3.4,3.3)) # sum over bins and distance classes
area.csp <- dimSums(x[,,"area"][,,"CSP"][,,c("0-50", "50-100")], dim=c(3.4, 3.3))
# share of area if PV installed only where no csp can be installed
if(packageVersion("magclass") < 6) {
area.only.pv.share <- ((area.pv+1)-(area.csp+1)) / (area.pv+1)
x <- add_columns(x, c("PVcomp", "PVonly"), 3.2)
x[,,"PVonly"] <- x[,,"PV"]*area.only.pv.share[,,"PV"]
x[,,"PVcomp"] <- x[,,"PV"]-(x[,,"PV"]*area.only.pv.share[,,"PV"])
} else {
area.only.pv.share <- collapseDim(((area.pv+1)-(area.csp+1)) / (area.pv+1))
xPVonly <- setItems(x[,,"PV"]*area.only.pv.share, dim = "Technology", value = "PVonly")
xPVcomp <- setItems(x[,,"PV"]-(x[,,"PV"]*area.only.pv.share), dim = "Technology", value = "PVcomp")
x <- mbind(x, xPVonly, xPVcomp)
}
### create new distance class: 1-100red (distance class where all between 1-100 are included and
# far away distanced classes are reduced in FLH which accounts for transmission losses)
bins.pv <- getNames(x[,,"PV"], dim = 4)
bins.csp <- getNames(x[,,"CSP"], dim = 4)
x1.pv <- x[,,"0-50"][,,"PV"][,,bins.pv]
x1.csp <- x[,,"0-50"][,,"CSP"][,,bins.csp]
x2.pv <- x[,,"50-100"][,,"PV"][,,bins.pv]
x2.csp <- x[,,"50-100"][,,"CSP"][,,bins.csp]
# equivalents of bins in the two distance classes
offset.pv <- 4
offset.csp <- 2
bins.pv.d2 <- c(rep(head(bins.pv, 1), offset.pv), bins.pv[1:(length(bins.pv)-offset.pv)])
bins.pv.agg <- cbind(bins.pv, bins.pv.d2)
colnames(bins.pv.agg) <- c("d1", "d2")
bins.csp.d2 <- c(rep(head(bins.csp, 1), offset.csp), bins.csp[1:(length(bins.csp)-offset.csp)])
bins.csp.agg <- cbind(bins.csp, bins.csp.d2)
colnames(bins.csp.agg) <- c("d1", "d2")
x2.pv <- toolAggregate(x2.pv, rel=bins.pv.agg, dim=3.4)
getSets(x2.pv) <- getSets(x1.pv)
x2.csp <- toolAggregate(x2.csp, rel=bins.csp.agg, dim=3.4)
getSets(x2.csp) <- getSets(x1.csp)
.missingBins <- function(x1,x2) {
bin1 <- getItems(x1, dim = "Bin")
bin2 <- getItems(x2, dim = "Bin")
return(bin1[which(!(bin1 %in% bin2))])
}
missing.bins.pv <- .missingBins(x1.pv, x2.pv)
missing.bins.csp <- .missingBins(x1.csp, x2.csp)
x2.pv <- add_columns(x2.pv, missing.bins.pv, 3.4)
x2.pv[,,missing.bins.pv] <- 0
x2.csp <- add_columns(x2.csp, missing.bins.csp, 3.4)
x2.csp[,,missing.bins.csp] <- 0
# do summation
x <- add_columns(x, "1-100red", 3.3)
x[,,"PV"][,,"1-100red"] <- x1.pv[,,"0-50"]+ x2.pv[,,"50-100"]
x[,,"CSP"][,,"1-100red"] <- x1.csp[,,"0-50"]+ x2.csp[,,"50-100"]
# emtpy declarations of variables used in dplyr operations
# (needed to make dplyr work within library)
region <- NULL
Type <- NULL
Technology <- NULL
Distance <- NULL
FLH <- NULL
value <- NULL
Bin <- NULL
capacity <- NULL
area <- NULL
maxprod <- NULL
production <- NULL
maxprod.norm <- NULL
TotalPot <- NULL
diff <- NULL
cumulative <- NULL
grade <- NULL
grade.seq <- NULL
maxprod.weight <- NULL
Seclast <- NULL
Max <- NULL
Min <- NULL
### FS: map and aggregate FLH bins from DLR data to REMIND grades
# method:
# 1. cumulative potential (maxprod) is calculated in descending order of FLHs
# 2. cumulative potential is normalized by current (2015) FE electricity consumption (IEA)
# 3. cumulative potential vs. FLH is interpolated between given data points step-wise and linearly
# to have at least one point in each REMIND grade
# 4. cumulative potential is mapped to REMIND grades by dividing it according to grade.breaks parameter (e.g. if 1. grade:
# 0-0.2, that means that the top 20% of locations are mapped into the first REMIND grade)
# 5. all potential that is above the last value of grade breaks are mapped into the last grade
# 6. an exception is made for regions/technologies where total potential is below the lower bound of the last grade s.t.
# last grade would be emtpy -> here the grades are determined by equally spaced bins of potential up to the total potential (e.g. for Japan)
# set parameters for bin to grade mapping
grade.breaks <- c(0.1,0.3,0.6,1,2,5,10,20) # grade breaks normalized by current production
n.intp <- 400 # number of interpolation points between given bins (to make sure at least one value is assigned to every REMIND grade)
thres.offset <- 30 # interpolation will be applied up to lower bound of last grade + offset (unit: normalized cumulative production)
# for assuring that enough data points are there for interpolation up to lower bound of last grade
techs <- c("CSP", "PV") # technologies to aggregate
dist <- "1-100red" # distance class to aggregate
# reference for grade distinction = 2015 production
# assign 0.01 EJ/yr as reference for grade distinction to countries with zero 2015 production
MaxProd.Norm <- toolAggregate(weight, rel)
MaxProd.Norm[MaxProd.Norm == 0] <- 0.01
# convert data to quitte format because more convenient for the following operations
df.x <- as.quitte(x[,,techs][,,dist]) %>%
# drop 0 and NA potentials
#filter(value > 0 , !is.na(value)) %>%
# rename Bin to Full load hours, convert to numerical
mutate(FLH = as.numeric(as.character(Bin))) %>%
select(region, Type, Technology, Distance, value, FLH)
# calculate maxprod (potential/maximum possible production in grade) in GWh by: capacity * FLH (Full Load Hours) / 1000
df.pot <- df.x %>%
spread(Type, value) %>%
mutate( maxprod = capacity * FLH / 1000) %>%
# remove those locations with NA or 0 area or potential
filter( maxprod > 0, area > 0) %>%
gather(Type, value, capacity, area, maxprod)
# calculate cumulated capacity, maxprod (potential) and area in descending grade order
# to obtain how much can be produced in total at this FLH or higher FLH
df.cuml <- df.pot %>%
group_by(region, Type, Technology, Distance) %>%
arrange(desc(FLH)) %>%
mutate( value = cumsum(value)) %>%
ungroup()
# process 2015 IEA electricity production
df.prod.2015 <- as.quitte(MaxProd.Norm) %>%
# FE 2015 in GWh/yr
mutate( production = value / 3.6 * 1e6) %>%
select(region, production)
# calculate cumulative normalized potential
# noramlized by current (2015) production
df.maxprod.norm <- df.cuml %>%
filter(Type == "maxprod") %>%
left_join( df.prod.2015) %>%
mutate(maxprod.norm = value/production) %>%
select(region, Technology, Distance, FLH, maxprod.norm)
## join normalized cumulative maxprod agin to df with cumulative maxprod, area, capacity
#to be able to interplolate all three measures
df.cuml <- df.cuml %>%
spread(Type, value) %>%
left_join(df.maxprod.norm)
# get total potential per region, technology, distance,
# needed for distinguishing mapping of fine grades to REMIND grades later
df.totalPot <- df.cuml %>%
group_by(region, Technology, Distance) %>%
summarise( TotalPot = max(maxprod.norm))
# define function needed for interpolation inbetween given points
# returns vector x appended by n equally spaced values between those points
# x: x coordinates of given (x,y) points
# y: difference between neighboring x coordinates
# n: number of points
seq.between <- function(x,y,n) {
fac <- 1:n/(n+1)
out <- x
for (i in 1:length(fac)) {
out <- append(out, x-y*fac[i])
}
return(out)
}
# interpolate cumulative normalized potential vs. FLH step-wise linearly between the given data points to obtain fine grades
df.interpolate <- df.cuml %>%
# filter for potential below last grade, the rest will go into last grade,
# add offset (thres.offset) to obtain values above the lower bound of the last grade
# to assure that values are interpolated at least up to this lower bound
filter( maxprod.norm < grade.breaks[length(grade.breaks)]+thres.offset) %>%
group_by(region, Technology, Distance) %>%
# order by descending FLH
arrange(desc(FLH)) %>%
# recompute potential as difference between normalized potential of neighboring FLH bins
mutate( diff = maxprod.norm - lag(maxprod.norm)) %>%
mutate( diff = ifelse(is.na(diff), maxprod.norm, diff)) %>%
# expand by n.intp equally spaced values between the given maxprod.norm values
complete(maxprod.norm = seq.between(maxprod.norm, diff, n.intp)) %>%
# interpolate maxprod, area, capacity linearly at new data points between given data points
mutate(FLH = zoo::na.approx(FLH, na.rm = F), maxprod = zoo::na.approx(maxprod, na.rm = F),
area = zoo::na.approx(area, na.rm = F), capacity = zoo::na.approx(capacity, na.rm = F)) %>%
# remove NAs that were appended above FLHs of first data point
filter( !is.na(FLH))
# transform cumulative values (maxprod, area, capacity) of fine grades to values per fine grade
# by performing gradient operation
df.fine.grades <- df.interpolate %>%
select(region, Technology, Distance, FLH, maxprod.norm, maxprod, capacity, area) %>%
gather(Type, value, maxprod, capacity, area) %>%
group_by(region, Type, Technology, Distance) %>%
arrange(desc(FLH)) %>%
# calculate value per fine grade by gradient of cumulative value
mutate( diff = value - lag(value)) %>%
# highest FLH has no predecessor, so take diff to 0, i.e. value itself
mutate( diff = ifelse(is.na(diff), value, diff)) %>%
ungroup() %>%
# relabel: value is now the potential in this fine grade,
# while the other is the cumulative value incl. all grades with higher FLH
rename( cumulative = value, value = diff)
## for regions/technologies with total potential WITHIN last grade:
# assign REMIND grades to interpolated FLH bins according to grade breaks of normalized potential defined above
df.map.REMIND <- df.fine.grades %>%
select(region, Technology, Distance, Type, maxprod.norm, FLH, value) %>%
mutate(grade = .bincode(maxprod.norm, c(0,grade.breaks))) %>%
# filter out NA grades (maxprod.norm greater than largest element in grade.breaks),
# this potential would be in last grade
filter(!is.na(grade))
## for regions/technologies with total potential BELOW last grade (so that last grade would be empty):
# map to equally sized grades up to total potential (so that each grade filled)
# determine region/technologies with first (highest FLH) data point above first grade
# and with total potential below last grade
df.OutsideBreaks <- df.cuml %>%
group_by(region, Technology, Distance) %>%
summarise( Min = min(maxprod.norm)) %>%
ungroup() %>%
full_join(df.totalPot) %>%
filter( Min > grade.breaks[1] | TotalPot < grade.breaks[length(grade.breaks)]) %>%
mutate( Max = ifelse(is.na(TotalPot), grade.breaks[length(grade.breaks)], TotalPot))
# for both (too large first grade and too small last grade): distribute grades equally spaced
# ugly loop soluation, I have not found a dplyr grouping solution
# for making .bincode() breaks argument depend on group
for (i in 1:nrow(df.OutsideBreaks)) {
# temporary dataframe for mapping small potentials to equally spaced grades
df.map.temp <- df.fine.grades %>%
select(region, Technology, Distance, Type, maxprod.norm, FLH, value) %>%
filter( region == as.character(df.OutsideBreaks$region[i]),
Technology == as.character(df.OutsideBreaks$Technology[i]),
Distance == as.character(df.OutsideBreaks$Distance[i]))
# sry: a bit messay the following case distinction...if there is ever time, could be structured better
# 1. for regions/technologies where highest FLH data point (DLR) is outisde first grade (s.t. first grade would be NA)
# create equally spaced grades up to the first element of grade.breaks that is within the original FLH data bins
if (df.OutsideBreaks$Min[i] > grade.breaks[1] & df.OutsideBreaks$Max[i] >= grade.breaks[8]) {
# if lowest DRL FLH grade is below maximum of grade breaks, distribute grades equally from minimum to maximum DLR data point
if (df.OutsideBreaks$Min[i] > grade.breaks[8]) {
new.grade.breaks <- seq(df.OutsideBreaks$Min[i]-1e-5,df.OutsideBreaks$Max[i]+1e-5,length.out = length(grade.breaks))
} else {
grades.to.shift <- grade.breaks[grade.breaks>df.OutsideBreaks$Min[i]]
new.grade.breaks <- seq(df.OutsideBreaks$Min[i]-1e-5,min(grades.to.shift), length.out = (9-length(grades.to.shift)+1))
new.grade.breaks <- c(new.grade.breaks, grades.to.shift[-1])
}
# 2. if highest FLH data point (DLR) outside first grade and (!)
# total potential below lower bound of last grade ->
# equally spaced grades up to the first element of grade.breaks that is within the original FLH data bins and (!)
# split last grade into multiple equally spaced grades
} else if (df.OutsideBreaks$Min[i] > grade.breaks[1] & df.OutsideBreaks$Max[i] < grade.breaks[8] & nrow(df.map.temp) > 0) {
grades.to.shift <- grade.breaks[grade.breaks>df.OutsideBreaks$Min[i]]
new.grade.breaks <- seq(df.OutsideBreaks$Min[i]-1e-5,min(grades.to.shift), length.out = (9-length(grades.to.shift)+1))
new.grade.breaks <- c(new.grade.breaks, grades.to.shift[-1])
grades.to.keep <- new.grade.breaks[new.grade.breaks < df.OutsideBreaks$Max[i]]
new.grade.breaks <- c(grades.to.keep[-length(grades.to.keep)], seq(grades.to.keep[length(grades.to.keep)],
df.OutsideBreaks$Max[i]+1e-2,
length.out = (9-length(grades.to.keep)+2)))
# 3. if total potential is below the current generation -> equally spaced grades up to total potential
} else if (df.OutsideBreaks$Max[i] < 1 ) {
new.grade.breaks <- c(seq(df.OutsideBreaks$Min[i]-1e-5,df.OutsideBreaks$Max[i],
length.out=(length(grade.breaks)+2)))
# 4. if total potential is above current generation but below lower bound of last grade
# -> split last grade into multiple equally spaced grades
} else if (df.OutsideBreaks$Max[i] > 1 & df.OutsideBreaks$Max[i] < grade.breaks[8] ) {
grades.to.keep <- grade.breaks[grade.breaks < df.OutsideBreaks$Max[i]]
new.grade.breaks <- c(0,grades.to.keep[-length(grades.to.keep)], seq(grades.to.keep[length(grades.to.keep)],
df.OutsideBreaks$Max[i]+1e-2,
length.out = (9-length(grades.to.keep)+1)))
}
df.map.temp <- df.map.temp %>%
mutate(grade.seq = .bincode(maxprod.norm, new.grade.breaks))
# bind rows of temporary datafrage to output dataframe of loop: df.map.small
if (i == 1) {
df.outside <- df.map.temp
} else {
df.outside <- rbind(df.outside, df.map.temp)
}
}
# replace potential values of small potential regions with the equally spaced grades from the loop to avoid emtpy grades
df.map.REMIND <- df.map.REMIND %>%
left_join(df.outside) %>%
mutate(grade = ifelse(is.na(grade.seq), grade, grade.seq)) %>%
select(-grade.seq)
# spread maxprod column to have weight for FLH aggregation in next step
df.map.REMIND <- df.map.REMIND %>%
spread(Type, value) %>%
mutate(maxprod.weight = maxprod) %>%
gather(Type, value, maxprod, capacity, area)
## aggregate fine grades to REMIND grades
df.aggregate.grades <- df.map.REMIND %>%
group_by(region, Type, Technology, Distance, grade) %>%
# aggregate fine grade to REMIND grades: FLH <- FLHs weighted by maxprod of fine grade
# potential, area, capacity <- sum over all values potential/area/capacity REMIND grade
summarise( FLH = weighted.mean(FLH, maxprod.weight), value = sum(value)) %>%
ungroup() %>%
# add FLH to Type column to make it another dimension like maxprod, area etc.
spread(Type, value) %>%
gather(Type, value, capacity, area, maxprod, FLH) %>%
select(region, Type, Technology, Distance, grade, value)
# get overlap of first data point in last grade with second last grade
# needs to be substracted from last grade later
# to make sure that we get the right totals over all grades
# last data point before lastgrade
df.seclast <- df.cuml %>%
filter(maxprod.norm < grade.breaks[length(grade.breaks)]) %>%
group_by(region, Technology, Distance) %>%
mutate( Max = max(maxprod.norm)) %>%
ungroup() %>%
filter( maxprod.norm == Max) %>%
select(-Max) %>%
gather(Type, Seclast, maxprod, area, capacity) %>%
select(region, Technology, Distance, Type, Seclast)
# calculate maxprod, area, capacity difference between last data point before last grade
# and lower bound of last grade, to be added removed from lastgrade later
df.lastgrade.overlap <- df.interpolate %>%
# filter for maxprod, area, capacity at lower bound of last grade (threshold)
filter(maxprod.norm < grade.breaks[length(grade.breaks)]) %>%
gather(Type, value, maxprod, area, capacity) %>%
group_by(region, Technology, Distance) %>%
mutate( Max = max(maxprod.norm)) %>%
ungroup() %>%
filter(maxprod.norm == Max) %>%
select(-Max) %>%
#join lsat data point before last grade
left_join(df.seclast) %>%
mutate( diff = value - Seclast) %>%
select(region, Technology, Distance, Type, diff)
# assign remaining potential to last grade for regions/technologies
# with total potential greater than last grade
df.lastgrade <- df.cuml %>%
# filter for regions/technologies... with total potential within last grade
filter( maxprod.norm >= grade.breaks[length(grade.breaks)]) %>%
select(region, Technology, FLH) %>%
left_join(df.pot) %>%
group_by(region, Technology, Distance, Type) %>%
# sum all data point above lower bound of last grade to get last grade
summarise( value = sum(value), FLH = mean(FLH)) %>%
ungroup() %>%
mutate( grade = length(grade.breaks)+1) %>%
spread(Type, value) %>%
gather(Type, value, capacity, area, maxprod, FLH) %>%
# join overlap of first data point in last grade with second last grade from last grade
left_join(df.lastgrade.overlap) %>%
# subtract overlap from maxprod, area, capacity of last grade
# because this overlap was already assgined to second last grade
mutate( value = ifelse(is.na(diff), value, value - diff)) %>%
select(region, Type, Technology, Distance, grade, value)
df.aggregate.grades <- df.lastgrade %>%
rbind(df.aggregate.grades) %>%
arrange(region, Technology, Distance, Type, grade, value)
# relabel dimensions, convert values to fit to desired REMIND input
df.out <- df.aggregate.grades %>%
quitte::revalue.levels(Type = c("area" = "limitGeopot", "FLH" = "nur"),
Technology = c("PV" = "spv", "CSP" = "csp")) %>%
# convert FLH to capacity factor
mutate( value = ifelse( Type=="nur", round(value/8760,4), value)) %>%
# convert maxprod from GWh to EJ
mutate( value = ifelse( Type == "maxprod", value * 3.6 * 1e-6, value)) %>%
arrange(region, Technology, Distance, Type, grade, value)
### create output magpie object
# convert to magpie object
out <- as.magpie(df.out, spatial = 1, temporal=NULL, datacol=6)
# remove additional dimensions (e.g. Distance class)
out <- collapseNames(out)
# calculate land use in MW/km2
out <- add_columns(out, addnm = c("luse"), dim = 3.1)
out[,,"luse.spv"] <- dimSums(out, dim = 3.3, na.rm=T)[,,"capacity"][,,"spv"]/ dimSums(out, dim = 3.3, na.rm=T)[,,"limitGeopot"][,,"spv"]
out[,,"luse.csp"] <- dimSums(out, dim = 3.3, na.rm=T)[,,"capacity"][,,"csp"]/ dimSums(out, dim = 3.3, na.rm=T)[,,"limitGeopot"][,,"csp"]
# remove capacity
out <- out[,,"capacity",invert=TRUE]
# relabel sets
getSets(out) <- c("region", "year", "type", "technology" , "rlf")
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.