R/plt_boxplot.R

Defines functions plt_boxplot

Documented in plt_boxplot

#' @title Plot distribution of a variable as boxplot from a data set
#' @description Plot the distribution of a variable from a data set. Default is the EntrancesData data set. The distribution can be plotted either for all observations or individually for each year or each region (e.g. NUTS3) The plot can be saved as png and printed in the R Studio plot window.
#'
#' @param VarName character string o with the name of the variable which shall be plotted.
#' @param data data_frame containing the data you want to use. It must contain a column called year containing the years of the observation, a column called value containing the variable values and a column called NUTS_ID with regional IDs. Default is the EntrancesData dataset.
#' @param distribution_of, "region", "year" or "all", determines if one box shows the distribution of the values for a region, for a year or for all values of the variable
#' @param years integer vector containing all years for which the data should be plotted. Default is all years contained in the dataset
#' @param nuts_ids, sting vector with NUTS IDs which should be plotted. Default is "all", selecting all NUTS IDs in the dataset for plotting.
#' @param print_plot logical, if TRUE, the plot is displayed in the Plot window
#' @param save_png logical, if TRUE, a png of the plot is saved.
#' @param legend_labels, vector with legend labels, if set to "default", NUTS/region IDs are used. Legend labels can only be changed, when distribution_of is set to "region" and the number of regions is smaller than 12. Otherwise default legend labels are used.
#' @param folder_for_png string, name of the folder in the working directory where a picture of the plots should be saved. folder="wd" saves plots directly in the working directory. Default is the folder Plots in the working directory. If the folder doesn't exist, the function creates a new folder in the working directory.
#' @param NUTS_level string, plots data only for a specific NUTS level. default is "all", so all data is plotted. Other possible inputs: "NUTS3", "NUTS2","NUTS1","NUTS country"
#' @export
#' @examples
#' plt_boxplot(VarName="unp_%", data=EntrancesData, distribution_of="year", years=c(2000:2020), nuts_ids=c("IT", "PL", "RO"), print_plot=TRUE, save_png=FALSE, legend_labels=c("Italy", "Poland", "Romania"), folder_for_png="Plots", NUTS_level="all")
plt_boxplot <- function(VarName, data=EntrancesData, distribution_of="all", years="all", nuts_ids="all", print_plot=TRUE, save_png=F, legend_labels="default", folder_for_png="Plots", NUTS_level="all") {
  library(ggplot2)
  #extract data for variables if VarName is not the default
  data<-get_data_for_variable(data=data, VarName=VarName)
  suppressWarnings(if(nuts_ids[1]!="all") data<-get_data_for_regions(RegionIDs=nuts_ids, data=data))
  #extract data for NUTS level
  if(NUTS_level!="all"){
    if (NUTS_level=="NUTS3") data<-get_data_NUTS3(data)
    if (NUTS_level=="NUTS2") data<-get_data_NUTS2(data)
    if (NUTS_level=="NUTS1") data<-get_data_NUTS1(data)
    if (NUTS_level=="NUTS country") data<-get_data_NUTS_country(data)
  }
  #extract data for years, if years is not set to default
  if (class(years)=="integer") {
    check_fun<-try(data<-data[which(data$year %in% years),])
    if (class(check_fun)=="try-error") stop ("Error: years is not correctly defined")
  } else {
    if (years[1]!="all") stop("Variable years not correctly defined. years should be a vector containing years as integer or be set to default")
  }
  # Create plot object with different plots for each variable
  if (distribution_of=="region") {
    if (legend_labels[1]=="default") {
      plt<-ggplot2::ggplot(data, aes(x=factor(NUTS_ID), y=value, col=NUTS_ID)) + geom_boxplot() + labs(title=VarName) + xlab("regional ID")
    } else {
      if (length(legend_labels)<9) {
        plt<-ggplot2::ggplot(data, aes(x=factor(NUTS_ID), y=value, col=NUTS_ID)) + geom_boxplot() + labs(title=VarName) + xlab("regional ID") + scale_color_manual(labels = legend_labels, values = RColorBrewer::brewer.pal(length(legend_labels), "Dark2"))
      } else {
        if (length(legend_labels)<12) {
          plt<-ggplot2::ggplot(data, aes(x=factor(NUTS_ID), y=value, col=NUTS_ID)) + geom_boxplot() + labs(title=VarName) + xlab("regional ID") + scale_color_manual(labels = legend_labels, values = RColorBrewer::brewer.pal(length(legend_labels), "Paired"))
        } else {
          plt<-ggplot2::ggplot(data, aes(x=factor(NUTS_ID), y=value, col=NUTS_ID)) + geom_boxplot() + labs(title=VarName) + xlab("regional ID")
          print("Too many legend labels for color palette. legend_labels were set back to 'default'")}
      }
    }
  } else {
    if (distribution_of=="year") {
      plt<-ggplot2::ggplot(data, aes(x=factor(year), y=value)) + geom_boxplot() + labs(title=VarName) + xlab("year") + theme(legend.position = "none")
    } else {
      if (distribution_of=="all") {
        plt<-ggplot2::ggplot(data, aes(x=VarName, y=value, col=VarName)) + geom_boxplot() + labs(title=VarName)+ theme(legend.position = "none")
      } else  {
        stop ("distribution_of is not correctly defined. See ?plt_boxplot")
      }
    }
  }
  # defining directory for saving plots. If folder_for_png="wd", plots are saved in working directory.
  #folder_for_plot can be defined as a path or as a folder name for a folder in the working directory.
  #If the folder doesn't exist in the working directory, the folder is created.
  #Default is a subfolder Plots in the working directory
  if (save_png==TRUE) {
    current_wd<-getwd()
    if (folder_for_png=="wd") {
      directory<-current_wd
    } else {
      directory<-paste(current_wd,"/", folder_for_png,"/", sep="")
      if(!dir.exists(file.path(current_wd, folder_for_png))) {
        dir.create(file.path(current_wd, folder_for_png))
        print(paste("The folder ", folder_for_png, " was created in the directory", sep=""))
      }
    }
    suppressMessages(errormsg <- try(ggplot2::ggsave(paste(directory, VarName, ".png", sep=""), plot = plt), silent = FALSE))
    if (class(errormsg)=="try-error") print("Error: saving plot as png not successfully. Check variable name for special characters. Check if the plot can be displayed in RStudio if print_plot is set to FALSE.")
  }

  if (print_plot==TRUE) plt
}
THartl1/EntrancesDataPackage documentation built on Dec. 18, 2021, 4:01 p.m.