R/aggregate_for_variable_by_nuts.R

Defines functions aggregate_for_variable_by_nuts

Documented in aggregate_for_variable_by_nuts

#' @title Data aggregation function for variable and NUTS IDs
#' @description Aggregates data for a variable and a lists of nuts_ids for each year.
#' @param nuts_ids vector with character strings with the regional IDs that shall be aggregated.
#' @param VarName character string with the name of the variable.
#' @param aggregation_name ID of the aggregated variable, which will be used instead of the regional ID (NUTS_ID)
#' @param group_by "sum", "mean", "median", indicates whether aggregation should work by using the sum, the mean or the median of the data.
#' @param NUTS_level indicates, if the nuts_ids shall be filtered on a specific NUTS level (NUTS3, NUTS2, NUTS1, NUTS country). Default is all.
#' @param data dataframe, from which you want to extract your data. Default is the EntrancesData dataset
#' @return dataframe, with the sum of the values of the variable for each year, with no NAs
#' @export
#' @examples
#' #Define a list of NUTS IDs , a variable and the type of aggregation (sum, mean or median) and the name for the regional ID for the aggregation to get a data frame with the aggregated values for each year.
#' #Additionally you can filter your NUTS IDs for a specific level (NUTS3, NUTS2, NUTS1, NUTS country).
#' #You can define a data frame, if you don't want to use the EntrancesData data set from the package.
#' aggregate_for_variable_by_nuts(nuts_ids=c("ITG2B", "ITG2C"), VarName="GDP_EUR", aggregation_name="GDP_Sulcis_NUTS3", group_by="sum", NUTS_level="all", data=EntrancesData)

aggregate_for_variable_by_nuts<- function(nuts_ids, VarName, aggregation_name, group_by="sum", NUTS_level="all", data=EntrancesData){
  new_msg<-0
  data<-get_data_for_regions(RegionIDs=unlist(nuts_ids), data=data)
  data<-get_data_for_variable(data=data, VarName=VarName)
  if (NUTS_level=="NUTS3") data<-get_data_NUTS3(data)
  if (NUTS_level=="NUTS2") data<-get_data_NUTS2(data)
  if (NUTS_level=="NUTS1") data<-get_data_NUTS1(data)
  if (NUTS_level=="NUTS country") data<-get_data_NUTS_country(data)
  timetable<-get_timetable_for_variable(data=data, VarName = VarName)
  dataout<-data[1,]
  dataout<-dataout[-1,]
  region<-"individual aggregation"
  ID=aggregation_name
  if (nrow(timetable!=0)){
    for (col in 2:ncol(timetable)){
      if (sum(is.na(timetable[,col]))==0 & nrow(timetable!=0)){
        if (group_by=="sum"){
          value<-sum(timetable[,col])
        } else {
          if (group_by=="mean"){
            value<-sum(timetable[,col])
          } else {
            if (group_by=="median"){
              value<-sum(timetable[,col])
            } else {
              stop("Parameter for group_by not valid")
            }
          }
        }
        year<-as.numeric(colnames(timetable)[col])
        dataout[nrow(dataout)+1,]<-c(ID, year, VarName, value, region)
      }
    }
  } else {
    new_msg<-1
    if (exists("messages")) assign("messages", c(messages, paste("No data available for ", aggregation_name, " on this NUTS level.", sep="")), envir = .GlobalEnv)
    if (!exists("messages")) assign("messages", paste("No data available for ", aggregation_name, " on this NUTS level.", sep=""), envir = .GlobalEnv)
  }

  dataout$year<-as.numeric(dataout$year)
  dataout$value<-as.numeric(dataout$value)
  if (exists("messages") & new_msg==1) print("check display_messages() to see warning messages. Messages will be deleted after displaying.")
  return(dataout)
}
THartl1/EntrancesDataPackage documentation built on Dec. 18, 2021, 4:01 p.m.