R/visualise_variables_xx.R

#' @title Visualise each Variable Pair in a given Dataset
#' 
#' @description Plots an appropriate visualisations for two attributes in a given dataset.
#' This function utilises ggplot2 to design and plot the Visualisations.
#' Plots a scatterplot for two numeric variables. 
#' Plots a bar chart for two factor variables.
#' Plots a boxplot for one numeric variable and one categorical variable.
#' The plots can be standardised, whereby the values lie on the interval 0 to 1.
#' By default, regular plots are visualised.
#' There is also an option to click through the points one by one, or print them all simultaneously.
#' The plots are outputed as a list.
#' The plots can also be saved to a specified directory as pdfs.
#' Note, that it is crucial that the variables are correctly defined as either numeric or categorical.
#' 
#' @param dataset The dataset to be visualised. 
#' Note, appropriate data types must be defined for the columns.
#' 
#' @param y_index An integer value, indicating the column index of the response variable, the default is NULL.
#' 
#' @param y_name A character value, indicating the column name of the response variable, the default is NULL.
#' 
#' @param click A boolean value, indicating whether to click through the plots one by one, default is True,
#' 
#' @param standardise A logical value, indicating whether the axises should be standardised to the interval 0 to 1, the default is False.
#' 
#' @param directory A character object specifying the directory where the data frame is to be saved as a .pdf file.
#' 
#' @return Outputs a variety of bar charts, histograms and bar charts.
#' 
#' @import ggplot2
#' 
#' @export
#'
#' @seealso \code{\link{visualise_qqplot}}, \code{\link{visualise_residuals}}, \code{\link{visualise_variables_x}}
#' 
#' @examples 
#' #-- Example 1: LungCap Data --#
#' 
#' # Regular Visualisations of the data frame
#' visualise_variables_xx(dataset = lungcap, click = TRUE)
#' 
#' Regular visualistions of the data using y_name
#' visualise_variables_xx(dataset = lungcap, y_name = 'Gender')
#' 
#' # Standardised Visualtions of the dataset
#' visualise_variables_xx(dataset = lungcap, standardise = TRUE, click = FALSE)
#' 
#' # Standardised Visualisations of the data frame using y_index
#' visualise_variables_xx(dataset = lungcap, standardise = TRUE, y_index = 1)
#' 
#' #-- Example 2: Titanic Data --#
#' 
#' # Regular Visualisations with click option
#' visualise_variables_xx(dataset = titanic, click = TRUE)
#' 
#' Regular visualistions of the data using y_name
#' visualise_variables_xx(dataset = titanic, y_name = 'Gender', click = TRUE)
#' 
#' # Standardised Visualtions of the dataset
#' visualise_variables_xx(dataset = titanic, standardise = TRUE, click = FALSE)
#' 
# Standardised Visualisations of the data frame using y_index
#' visualise_variables_xx(dataset = titanic, standardise = TRUE, y_index = 1)
#' 
visualise_variables_xx <- function(dataset, 
                                   y_index = NULL, 
                                   y_name = NULL,
                                   click = TRUE,
                                   standardise = FALSE, 
                                   directory = NULL) 
  {
  
  # set whether to click through the plots
  op = par(ask = click)
  
  #-------------------------------------------------------------------------------#
  # When y_index = NULL and y_name = NULL                                                           #
  #-------------------------------------------------------------------------------#
  
  if(is.null(y_index) & is.null(y_name)){
    
    # NOTE: ggplot2 automatically removes missing observations
    # Make sure the datset is converted to a data frame
    dataset <- as.data.frame(x = dataset)
    
    # create a plot list to hold the pictures
    plot_list <- list()
    
    # p is the picture index - tracks the number of pictures created
    p = 1
    
    #---------------------------------------------------------------------------#
    # Unstandardised Visualisations                                             #
    #---------------------------------------------------------------------------#
    
    # plot regular visualisations
    if(standardise == FALSE) {
      
      # use a for loop to loop through the columns of the dataset
      for (i in 1:(ncol(dataset))) {
        
        # create the jth column index
        j = i + 1
        
        # use a while loop to loop through the remaining columns of the dataset
        while (j <= ncol(dataset)) {
          
          # store the resonse variable names
          xiname <- colnames(dataset)[i]
          
          # store the predictor variable names
          xjname <- colnames(dataset)[j]
          
          #---------------------------------------------------------------------#
          # If the variable is categorical, plot a bar chart                    #
          #---------------------------------------------------------------------#
          
          if ((is.factor(dataset[,i]) & (is.factor(dataset[,j])))){
            
            # First it is neccesary to aggregate the raw data
            agg_data <- as.data.frame(x = table(dataset[,c(i,j)]), 
                                      col.names = c(xiname, xjname, "Count"))
            
            # Plot the aggregated data
            double_bar_chart <- ggplot(data = agg_data, 
                                       mapping = aes(x = agg_data[,1], 
                                                     y = agg_data[,3])) + 
                                       geom_bar(mapping = aes(fill = agg_data[,2]), 
                                                position = "dodge", 
                                                stat = "identity") + 
                                        # Give the labels to the plots
                                        labs(title = paste("Double Bar Chart of", xjname, "and", xiname, sep = " "), 
                                             x = xiname, 
                                             y = "Count") +
                                        # set the title of the legend
                                        guides(fill = guide_legend(title = yname)) +
                                        # Format the text of the plots 
              theme(axis.title.x = element_text(size = 20),
                    axis.title.y = element_text(size = 20),
                    plot.title = element_text(hjust = 0.5, size = 35),
                    axis.text.x = element_text(size = 20, face = "bold"),
                    axis.text.y = element_text(size = 20),
                    legend.title = element_text(size = 20),
                    legend.text = element_text(size = 30),
                    legend.position = "top")
            
            # save the plot to the list
            plot_list[[p]] <- double_bar_chart
            
            # print the plot
            print(double_bar_chart)
            
            # save the plot to the sepecifed directory
            if(!is.null(directory)) {
              
              # save the plot
              ggsave(filename = paste("Double_Bar_Chart_of_", xiname, "_and_", xjname, ".pdf", sep = ""), 
                     path = directory,
                     device = "pdf",
                     width = 8, 
                     height = 6, 
                     units = c("in"))
              
            }
            
            # print an update of the number of plots
            print(paste("Image", as.character(p), "Completed", sep = " "))
            
            # update plot index
            p = p + 1
            
            #-------------------------------------------------------------------#
            # If both variables are numeric, plot a scatterplot                 #
            #-------------------------------------------------------------------#
            
          } else if ((is.numeric(dataset[,i]) & (is.numeric(dataset[,j])))){
            
            # create the scatterplot
            scatterplot <- ggplot(data = dataset, 
                                  mapping = aes(x = dataset[,i], 
                                                y = dataset[,j])) + 
                                  geom_point() + 
                                  # Add a regression line in blue
                                  geom_smooth(method = "lm", 
                                              colour = "blue") +
                                  # Add a loess line in red
                                  geom_smooth(method = "loess", 
                                              colour = "red", 
                                              se = FALSE) +
                                  # Add labels to the plot
                                  labs(title = paste("Scatterplot of", xjname, 
                                                     "and", xiname, sep = " "), 
                                        x = xiname, 
                                        y = xjname) + 
                                  # Format the text of the plots 
              theme(axis.title.x = element_text(size = 20),
                    axis.title.y = element_text(size = 20),
                    plot.title = element_text(hjust = 0.5, size = 35),
                    axis.text.x = element_text(size = 20, face = "bold"),
                    axis.text.y = element_text(size = 20))
            
            # save the plot to the list
            plot_list[[p]] <- scatterplot
            
            # print the plot
            print(scatterplot)
            
            # save the plot to the sepecifed directory
            if(!is.null(directory)) {
              
              # save the plot
              ggsave(filename = paste("Scatterplot_of_", xjname, "_and_", xiname, ".pdf", sep = ""), 
                     path = directory,
                     device = "pdf",
                     width = 8, 
                     height = 6, 
                     units = c("in"))
              
            }
            
            # print an update of the number of plots
            print(paste("Image", as.character(p), "Completed", sep = " "))
            
            # update plot index
            p = p + 1
            
            #--------------------------------------------------------------------------------#
            # If the x variables is numeric and the y variable is categorical plot a boxplot #
            #--------------------------------------------------------------------------------#
            
          } else if((is.numeric(dataset[,i]) & (is.factor(dataset[,j])))){
            
            # create the boxplot
            boxplot <- ggplot(data = dataset, 
                              mapping = aes(x = dataset[,j], 
                                            y = dataset[,i])) + 
              geom_boxplot(outlier.colour = 'red') + 
              # Add labels to the plot
              labs(title = paste("Boxplot of", xiname, 
                                 "and", xjname, sep = " "), 
                   x = xjname, 
                   y = xiname) + 
              # Format the text of the plots 
              theme(axis.title.x = element_text(size = 20),
                    axis.title.y = element_text(size = 20),
                    plot.title = element_text(hjust = 0.5, size = 35),
                    axis.text.x = element_text(size = 20, face = "bold"),
                    axis.text.y = element_text(size = 20))
            
            # save the plot to the list
            plot_list[[p]] <- boxplot
            
            # print the plot
            print(boxplot)
            
            # save the plot to the sepecifed directory
            if(!is.null(directory)) {
              
              # save the plot
              ggsave(filename = paste("Boxplot_of_", xiname, "_and_", xjname, ".pdf", sep = ""), 
                     path = directory,
                     device = "pdf",
                     width = 8, 
                     height = 6, 
                     units = c("in"))
              
            }
            
            # print an update of the number of plots
            print(paste("Image", as.character(p), "Completed", sep = " "))
            
            # update plot index
            p = p + 1
            
            #--------------------------------------------------------------------------------#
            # If the y variables is numeric and the x variable is categorical plot a boxplot #
            #--------------------------------------------------------------------------------#
            
          } else if ((is.factor(dataset[,i]) & (is.numeric(dataset[,j])))){
            
            # create the box plot
            boxplot <- ggplot(data = dataset, 
                              mapping = aes(x = dataset[,i], 
                                            y = dataset[,j])) + 
              geom_boxplot(outlier.colour = 'red') + 
              coord_flip() +
              # Add labels to the plot
              labs(title = paste("Boxplot of", xjname, 
                                 "and", xiname, sep = " "), 
                   x = xiname, 
                   y = xjname) + 
              # Format the text of the plots 
              theme(axis.title.x = element_text(size = 20),
                    axis.title.y = element_text(size = 20),
                    plot.title = element_text(hjust = 0.5, size = 35),
                    axis.text.x = element_text(size = 20, face = "bold"),
                    axis.text.y = element_text(size = 20))
            
            # save the plot to the list
            plot_list[[p]] <- boxplot
            
            # print the plot
            print(boxplot)
            
            # save the plot to the sepecifed directory
            if(!is.null(directory)) {
              
              # save the plot
              ggsave(filename = paste("Boxplot_of_", xjname, "_and_", xiname, ".pdf", sep = ""), 
                     path = directory,
                     device = "pdf",
                     width = 8, 
                     height = 6, 
                     units = c("in"))
              
            }
            
            # print an update of the number of plots
            print(paste("Image", as.character(p), "Completed", sep = " "))
            
            # update plot index
            p = p + 1
            
          }
          
          # update the jth column index
          j = j + 1
          
        }
        
      }
      
      #---------------------------------------------------------------------------#
      # Standardised Visualisations                                               #
      #---------------------------------------------------------------------------#
      
      # plot standardised visualisations
    } else if(standardise == TRUE) {
      
      # use a for loop to loop through the columns of the dataset
      for (i in 1:(ncol(dataset))) {
        
        # create jth columnindex
        j = i + 1
        
        # use a while loop to loop through the remaining columns of the dataset
        while (j <= ncol(dataset)) {
          
          # store the predictor variable names
          xiname <- colnames(dataset)[i]
          
          # store the response variable name
          xjname <- colnames(dataset)[j]
          
          #---------------------------------------------------------------------#
          # If the variable is categorical, plot a bar chart                    #
          #---------------------------------------------------------------------#
          
          if ((is.factor(dataset[,i]) & (is.factor(dataset[,j])))){
            
            # First it is neccesary to aggregate the raw data
            agg_data <- as.data.frame(x = table(dataset[,c(i,j)]), 
                                      col.names = c(xiname, xjname, "Count"))
            
            # create the proportions
            Proportion <- agg_data[,3] / sum(agg_data[,3]) 
            
            # add the proportions to the aggragated dataset
            agg_data[,4] <- Proportion
            
            # Plot the aggregated data
            double_bar_chart<- ggplot(data = agg_data, 
                                      mapping = aes(x = agg_data[,1], 
                                                    y = agg_data[,4])) + 
                                      geom_bar(mapping = aes(fill = agg_data[,2]), 
                                               position = "dodge", 
                                               stat = "identity") + 
                                      # Give the labels to the plots
                                      labs(title = paste("Standardised Double Bar Chart of", xjname, 
                                                         "and", xiname, sep = " "), 
                                      x = xiname, 
                                      y = "Proportion") +
                                      # set the title of the legend
                                      guides(fill = guide_legend(title = yname)) +
                                      # Format the text of the plots 
              theme(axis.title.x = element_text(size = 20),
                    axis.title.y = element_text(size = 20),
                    plot.title = element_text(hjust = 0.5, size = 35),
                    axis.text.x = element_text(size = 20, face = "bold"),
                    axis.text.y = element_text(size = 20),
                    legend.title = element_text(size = 20),
                    legend.text = element_text(size = 30),
                    legend.position = "top")
            
            # save the plot to the list
            plot_list[[p]] <- double_bar_chart
            
            # print the plot
            print(double_bar_chart)
            
            # save the plot to the sepecifed directory
            if(!is.null(directory)) {
              
              # save the plot
              ggsave(filename = paste("Standardised_Double_Bar_Chart_of_", xiname, "_and_", xjname, ".pdf", sep = ""), 
                     path = directory,
                     device = "pdf",
                     width = 8, 
                     height = 6, 
                     units = c("in"))
              
            }
            
            # print an update of the number of plots
            print(paste("Image", as.character(p), "Completed", sep = " "))
            
            # update plot index
            p = p + 1
            
            #-------------------------------------------------------------------#
            # If both variables are numeric, plot a scatterplot                 #
            #-------------------------------------------------------------------#
            
          } else if ((is.numeric(dataset[,i]) & (is.numeric(dataset[,j])))){
            
            # standardise the numeric variables
            dataset <- standardise_variables(dataset = dataset, method = "range", lower_bound = 0, upper_bound = 1)
            
            # create the scatterplot
            scatterplot<- ggplot(data = dataset, 
                                 mapping = aes(x = dataset[,i], 
                                               y = dataset[,j])) + 
                                 geom_point() + 
                                 # Add a regression line in blue
                                 geom_smooth(method = "lm", 
                                             colour = "blue") +
                                 # Add a loess line in red
                                 geom_smooth(method = "loess", 
                                             colour = "red", 
                                             se = FALSE) +
                                 # Add labels to the plot
                                 labs(title = paste("Standardised Scatterplot of", xjname, 
                                                    "and", xiname, sep = " "), 
                                 x = xiname, 
                                 y = xjname) + 
                                 # Format the text of the plots 
              theme(axis.title.x = element_text(size = 20),
                    axis.title.y = element_text(size = 20),
                    plot.title = element_text(hjust = 0.5, size = 35),
                    axis.text.x = element_text(size = 20, face = "bold"),
                    axis.text.y = element_text(size = 20))
            
            # save the plot to the list
            plot_list[[p]] <- scatterplot
            
            # print the plot
            print(scatterplot)
            
            # save the plot to the sepecifed directory
            
            if(!is.null(directory)) {
              
              # save the plot
              ggsave(filename = paste("Standardised_Scatterplot_of_", xjname, 
                                      "_and_", xiname, ".pdf", sep = ""), 
                     path = directory,
                     device = "pdf",
                     width = 8, 
                     height = 6, 
                     units = c("in"))
              
            }
            
            # print an update of the number of plots
            print(paste("Image", as.character(p), "Completed", sep = " "))
            
            # update plot index
            p = p + 1
            
            #--------------------------------------------------------------------------------#
            # If the x variables is numeric and the y variable is categorical plot a boxplot #
            #--------------------------------------------------------------------------------#
            
          } else if((is.numeric(dataset[,i]) & (is.factor(dataset[,j])))){
            
            # standardise the numeric variables
            dataset <- standardise_variables(dataset = dataset, method = "range", lower_bound = 0, upper_bound = 1)
            
            # create the boxplot
            boxplot <- ggplot(data = dataset, 
                              mapping = aes(x = dataset[,j], 
                                            y = dataset[,i])) + 
              geom_boxplot(outlier.colour = 'red') + 
              # Add labels to the plot
              labs(title = paste("Standardised Boxplot of", xiname, 
                                 "and", xjname, sep = " "), 
                   x = xjname, 
                   y = xiname) + 
              # Format the text of the plots 
              theme(axis.title.x = element_text(size = 20),
                    axis.title.y = element_text(size = 20),
                    plot.title = element_text(hjust = 0.5, size = 35),
                    axis.text.x = element_text(size = 20, face = "bold"),
                    axis.text.y = element_text(size = 20))
            
            # save the plot to the list
            plot_list[[p]] <- boxplot
            
            # print the plot
            print(boxplot)
            
            # save the plot to the sepecifed directory
            if(!is.null(directory)) {
              
              # save the plot
              ggsave(filename = paste("Standardised_Boxplot_of_", xiname, "_and_", xjname, ".pdf", sep = ""), 
                     path = directory,
                     device = "pdf",
                     width = 8, 
                     height = 6, 
                     units = c("in"))
              
            }
            
            # print an update of the number of plots
            print(paste("Image", as.character(p), "Completed", sep = " "))
            
            # update plot index
            p = p + 1
            
            #--------------------------------------------------------------------------------#
            # If the y variables is numeric and the x variable is categorical plot a boxplot #
            #--------------------------------------------------------------------------------#
            
          } else if ((is.factor(dataset[,i]) & (is.numeric(dataset[,j])))){
            
            # standardise the numeric variables
            dataset <- standardise_variables(dataset = dataset, method = "range", lower_bound = 0, upper_bound = 1)
            
            # create the boxplot
            boxplot <- ggplot(data = dataset, 
                              mapping = aes(x = dataset[,i], 
                                            y = dataset[,j])) + 
              geom_boxplot(outlier.colour = 'red') + 
              coord_flip() +
              # Add labels to the plot
              labs(title = paste("Standardise Boxplot of", xjname, 
                                 "and", xiname, sep = " "), 
                   x = xiname, 
                   y = xjname) + 
              # Format the text of the plots 
              theme(axis.title.x = element_text(size = 20),
                    axis.title.y = element_text(size = 20),
                    plot.title = element_text(hjust = 0.5, size = 35),
                    axis.text.x = element_text(size = 20, face = "bold"),
                    axis.text.y = element_text(size = 20))
            
            # save the plot to the list
            plot_list[[p]] <- boxplot
            
            # print the plot
            print(boxplot)
            
            # save the plot to the sepecifed directory
            if(!is.null(directory)) {
              
              # save the plot
              ggsave(filename = paste("Standardised_Boxplot_of_", xjname, "_and_", xiname, ".pdf", sep = ""), 
                     path = directory,
                     device = "pdf",
                     width = 8, 
                     height = 6, 
                     units = c("in"))
              
            }
            
            # print an update of the number of plots
            print(paste("Image", as.character(p), "Completed", sep = " "))
            
            # update plot index
            p = p + 1
            
          }
          
          j = j + 1
          
        }
        
      }
      
    }
    
    #-------------------------------------------------------------------------------#
    # When y_index or y_name != NULL                                                #
    #-------------------------------------------------------------------------------#
    
  } else if(!is.null(y_index) | !is.null(y_name)){
    
    if(!is.null(y_name)){
      y_index = which(colnames(dataset) == y_name)
    }
    
    # NOTE: ggplot2 automatically removes missing observations
    # Make sure the datset is converted to a data frame
    dataset <- as.data.frame(x = dataset)
    
    # create a plot list to hold the pictures
    plot_list <- list()
    
    # p is the picture index - tracks the number of pictures created
    p = 1
    
    #---------------------------------------------------------------------------#
    # Unstandardised Visualisations                                             #
    #---------------------------------------------------------------------------#
    
    # plot regular visualisations
    if(standardise == FALSE) {
      
      # use a for loop to loop through the columns of the dataset
      for (i in 1:(ncol(dataset))) {
        
        # store the predictor variable names
        xiname <- colnames(dataset)[i]
        
        # store the response variable name
        yname <- colnames(dataset)[y_index]
        
        #-----------------------------------------------------------------------#
        # If the variable is categorical, plot a bar chart                      #
        #-----------------------------------------------------------------------#
        
        if ((is.factor(dataset[,i]) & (is.factor(dataset[,y_index])))){
          
          # First it is neccesary to aggregate the raw data
          agg_data <- as.data.frame(x = table(dataset[,c(i,y_index)]), 
                                    col.names = c(xiname, yname, "Count"))
          
          # Plot the aggregated data
          double_bar_chart <- ggplot(data = agg_data, 
                                     mapping = aes(x = agg_data[,1], 
                                                   y = agg_data[,3])) + 
                              geom_bar(mapping = aes(fill = agg_data[,2]), 
                                       position = "dodge", 
                                       stat = "identity") + 
                              # Give the labels to the plots
                              labs(title = paste("Double Bar Chart of", yname, 
                                                 "and", xiname, sep = " "), 
                                    x = xiname, 
                                    y = "Count") +
                              # set the title of the legend
                              guides(fill = guide_legend(title = yname)) +
                              # Format the text of the plots 
            theme(axis.title.x = element_text(size = 20),
                  axis.title.y = element_text(size = 20),
                  plot.title = element_text(hjust = 0.5, size = 35),
                  axis.text.x = element_text(size = 20, face = "bold"),
                  axis.text.y = element_text(size = 20),
                  legend.title = element_text(size = 20),
                  legend.text = element_text(size = 30),
                  legend.position = "top")
          
          # save the plot to the list
          plot_list[[p]] <- double_bar_chart
          
          # print the plot
          print(double_bar_chart)
          
          # save the plot to the sepecifed directory
          if(!is.null(directory)) {
            
            # save the plot
            ggsave(filename = paste("Double_Bar_Chart_of_", xiname, 
                                    "_and_", yname, ".pdf", sep = ""), 
                   path = directory,
                   device = "pdf",
                   width = 8, 
                   height = 6, 
                   units = c("in"))
            
          }
          
          # print an update of the number of plots
          print(paste("Image", as.character(p), "Completed", sep = " "))
          
          # update plot index
          p = p + 1
          
          #---------------------------------------------------------------------#
          # If both variables are numeric, plot a scatterplot                   #
          #---------------------------------------------------------------------#
          
        } else if ((is.numeric(dataset[,i]) & (is.numeric(dataset[,y_index])))){
          
          # create the scatterplot
          scatterplot <- ggplot(data = dataset, 
                                mapping = aes(x = dataset[,i], 
                                              y = dataset[,y_index])) + 
                        geom_point() + 
                        # Add a regression line in blue
                        geom_smooth(method = "lm", 
                                    colour = "blue") +
                        # Add a loess line in red
                        geom_smooth(method = "loess", 
                                    colour = "red", 
                                    se = FALSE) +
                        # Add labels to the plot
                        labs(title = paste("Scatterplot of", yname, 
                                           "and", xiname, sep = " "), 
                             x = xiname, 
                             y = yname) + 
                        # Format the text of the plots 
            theme(axis.title.x = element_text(size = 20),
                  axis.title.y = element_text(size = 20),
                  plot.title = element_text(hjust = 0.5, size = 35),
                  axis.text.x = element_text(size = 20, face = "bold"),
                  axis.text.y = element_text(size = 20))
          
          # save the plot to the list
          plot_list[[p]] <- scatterplot
          
          # print the plot
          print(scatterplot)
          
          # save the plot to the sepecifed directory
          if(!is.null(directory)) {
            
            # save the plot
            ggsave(filename = paste("Scatterplot_of_", yname, 
                                    "_and_", xiname, ".pdf", sep = ""), 
                   path = directory,
                   device = "pdf",
                   width = 8, 
                   height = 6, 
                   units = c("in"))
            
          }
          
          # print an update of the number of plots
          print(paste("Image", as.character(p), "Completed", sep = " "))
          
          # update plot index
          p = p + 1
          
          #--------------------------------------------------------------------------------#
          # If the x variables is numeric and the y variable is categorical plot a boxplot #
          #--------------------------------------------------------------------------------#
          
        } else if((is.numeric(dataset[,i]) & (is.factor(dataset[,y_index])))){
          
          # create the boxplot
          boxplot <- ggplot(data = dataset, 
                            mapping = aes(x = dataset[,y_index], 
                                          y = dataset[,i])) + 
                    geom_boxplot(outlier.colour = 'red') + 
                    # Add labels to the plot
                    labs(title = paste("Boxplot of", yname, 
                                       "and", xiname, sep = " "), 
                         x = yname, 
                         y = xiname) + 
                    # Format the text of the plots 
            theme(axis.title.x = element_text(size = 20),
                  axis.title.y = element_text(size = 20),
                  plot.title = element_text(hjust = 0.5, size = 35),
                  axis.text.x = element_text(size = 20, face = "bold"),
                  axis.text.y = element_text(size = 20))
          
          # save the plot to the list
          plot_list[[p]] <- boxplot
          
          # print the plot
          print(boxplot)
          
          # save the plot to the sepecifed directory
          if(!is.null(directory)) {
            
            # save the plot
            ggsave(filename = paste("Boxplot_of_", yname, "_and_", xiname, ".pdf", sep = ""), 
                   path = directory,
                   device = "pdf",
                   width = 8, 
                   height = 6, 
                   units = c("in"))
            
          }
          
          # print an update of the number of plots
          print(paste("Image", as.character(p), "Completed", sep = " "))
          
          # update plot index
          p = p + 1
          
          #--------------------------------------------------------------------------------#
          # If the y variables is numeric and the x variable is categorical plot a boxplot #
          #--------------------------------------------------------------------------------#
          
        } else if((is.factor(dataset[,i]) & (is.numeric(dataset[,y_index])))){
          
          # create the boxplot
          boxplot <- ggplot(data = dataset, 
                            mapping = aes(x = dataset[,i], 
                                          y = dataset[,y_index])) + 
                     geom_boxplot(outlier.colour = 'red') + 
                     coord_flip() +
                     # Add labels to the plot
                     labs(title = paste("Boxplot of", xiname, 
                                        "and", yname, sep = " "), 
                          x = xiname, 
                          y = yname) + 
                     # Format the text of the plots 
            theme(axis.title.x = element_text(size = 20),
                  axis.title.y = element_text(size = 20),
                  plot.title = element_text(hjust = 0.5, size = 35),
                  axis.text.x = element_text(size = 20, face = "bold"),
                  axis.text.y = element_text(size = 20))
          
          # save the plot to the list
          plot_list[[p]] <- boxplot
          
          # print the plot
          print(boxplot)
          
          # save the plot to the sepecifed directory
          if(!is.null(directory)) {
            
            # save the plot
            ggsave(filename = paste("Boxplot_of_", xiname, "_and_", yname, ".pdf", sep = ""), 
                   path = directory,
                   device = "pdf",
                   width = 8, 
                   height = 6, 
                   units = c("in"))
            
          }
          
          # print an update of the number of plots
          print(paste("Image", as.character(p), "Completed", sep = " "))
          
          # update plot index
          p = p + 1
          
          
        }
        
      }
      
    
      #---------------------------------------------------------------------------#
      # Standardised Visualisations                                               #
      #---------------------------------------------------------------------------#
     
    # create standardised plots 
  } else if(standardise == TRUE) {
    
    # usea for loop to loop through the columns of the dataset
    for (i in 1:(ncol(dataset))) {
      
      # save the name of the predictor variable
      xiname <- colnames(dataset)[i]
      
      # save the name of the response variable
      yname <- colnames(dataset)[y_index]
        
      #-----------------------------------------------------------------------#
      # If the variable is categorical, plot a bar chart                      #
      #-----------------------------------------------------------------------#
        
      if ((is.factor(dataset[,i]) & (is.factor(dataset[,y_index])))){
          
        # First it is neccesary to aggregate the raw data
        agg_data <- as.data.frame(x = table(dataset[,c(i,y_index)]), 
                                    col.names = c(xiname, yname, "Count"))
          
        # create the proportions
        Proportion <- agg_data[,3] / sum(agg_data[,3]) 
        
        # add the proportions to the aggregated data
        agg_data[,4] <- Proportion
          
        # Plot the aggregated data
        double_bar_chart<- ggplot(data = agg_data, 
                                    mapping = aes(x = agg_data[,1], 
                                                  y = agg_data[,4])) + 
                           geom_bar(mapping = aes(fill = agg_data[,2]), 
                                    position = "dodge", 
                                    stat = "identity") + 
                           # Give the labels to the plots
                           labs(title = paste("Standardised Double Bar Chart of", yname, 
                                              "and", xiname, sep = " "), 
                                x = xiname, 
                                y = "Proportion") +
                           # set the title of the legend
                           guides(fill = guide_legend(title = yname)) +
                           # Format the text of the plots 
          theme(axis.title.x = element_text(size = 20),
                axis.title.y = element_text(size = 20),
                plot.title = element_text(hjust = 0.5, size = 35),
                axis.text.x = element_text(size = 20, face = "bold"),
                axis.text.y = element_text(size = 20),
                legend.title = element_text(size = 20),
                legend.text = element_text(size = 30),
                legend.position = "top")
          
        # save the plot to the list
        plot_list[[p]] <- double_bar_chart
          
        # print the plot
        print(double_bar_chart)
          
        # save the plot to the sepecifed directory
        if(!is.null(directory)) {
            
          # save the plot
          ggsave(filename = paste("Standardised_Double_Bar_Chart_of_", xiname, 
                                    "_and_", yname, ".pdf", sep = ""), 
                   path = directory,
                   device = "pdf",
                   width = 8, 
                   height = 6, 
                   units = c("in"))
          
        }
        
        # print an update of the number of plots
        print(paste("Image", as.character(p), "Completed", sep = " "))
        
        # update plot index
        p = p + 1
        
        #---------------------------------------------------------------------#
        # If both variables are numeric, plot a scatterplot                   #
        #---------------------------------------------------------------------#
        
        } else if ((is.numeric(dataset[,i]) & (is.numeric(dataset[,y_index])))){
          
          # standardise the numeric variables
          dataset <- standardise_variables(dataset = dataset, method = "range", lower_bound = 0, upper_bound = 1)
          
          # create the scaterplot
          scatterplot<- ggplot(data = dataset, 
                               mapping = aes(x = dataset[,i], 
                                             y = dataset[,y_index])) + 
                               geom_point() + 
                               # Add a regression line in blue
                               geom_smooth(method = "lm", 
                                           colour = "blue") +
                               # Add a loess line in red
                               geom_smooth(method = "loess", 
                                           colour = "red", 
                                           se = FALSE) +
                               # Add labels to the plot
                               labs(title = paste("Standardised Scatterplot of", yname, 
                                                  "and", xiname, sep = " "), 
                                    x = xiname, 
                                    y = yname) + 
                               # Format the text of the plots 
            theme(axis.title.x = element_text(size = 20),
                  axis.title.y = element_text(size = 20),
                  plot.title = element_text(hjust = 0.5, size = 35),
                  axis.text.x = element_text(size = 20, face = "bold"),
                  axis.text.y = element_text(size = 20))
          
          # save the plot to the list
          plot_list[[p]] <- scatterplot
          
          # print the plot
          print(scatterplot)
          
          # save the plot to the sepecifed directory
          if(!is.null(directory)) {
            
            # save the plot
            ggsave(filename = paste("Standardised_Scatterplot_of_", yname, 
                                    "_and_", xiname, ".pdf", sep = ""), 
                   path = directory,
                   device = "pdf",
                   width = 8, 
                   height = 6, 
                   units = c("in"))
            
          }
          
          # print an update of the number of plots
          print(paste("Image", as.character(p), "Completed", sep = " "))
          
          # update plot index
          p = p + 1
          
          #--------------------------------------------------------------------------------#
          # If the x variables is numeric and the y variable is categorical plot a boxplot #
          #--------------------------------------------------------------------------------#
          
        } else if((is.numeric(dataset[,i]) & (is.factor(dataset[,y_index])))){
          
          # standardise the numeric variables
          dataset <- standardise_variables(dataset = dataset, method = "range", lower_bound = 0, upper_bound = 1)
          
          # create the box plot
          boxplot <- ggplot(data = dataset, 
                            mapping = aes(x = dataset[,y_index], 
                                          y = dataset[,i])) + 
                            geom_boxplot(outlier.colour = 'red') + 
                            # Add labels to the plot
                            labs(title = paste("Standardised Boxplot of", yname, 
                                               "and", xiname, sep = " "), 
                                 x = yname, 
                                 y = xiname) + 
                            # Format the text of the plots 
            theme(axis.title.x = element_text(size = 20),
                  axis.title.y = element_text(size = 20),
                  plot.title = element_text(hjust = 0.5, size = 35),
                  axis.text.x = element_text(size = 20, face = "bold"),
                  axis.text.y = element_text(size = 20))
          
          # save the plot to the list
          plot_list[[p]] <- boxplot
          
          # print the plot
          print(boxplot)
          
          # save the plot to the sepecifed directory
          if(!is.null(directory)) {
            
            # save the plot
            ggsave(filename = paste("Standardised_Boxplot_of_", yname, "_and_", xiname, ".pdf", sep = ""), 
                   path = directory,
                   device = "pdf",
                   width = 8, 
                   height = 6, 
                   units = c("in"))
            
          }
          
          # print an update of the number of plots
          print(paste("Image", as.character(p), "Completed", sep = " "))
          
          # update plot index
          p = p + 1
          
          #--------------------------------------------------------------------------------#
          # If the y variables is numeric and the x variable is categorical plot a boxplot #
          #--------------------------------------------------------------------------------#
          
        } else if((is.factor(dataset[,i]) & (is.numeric(dataset[,y_index])))){
          
          # standardise the numeric variables
          dataset <- standardise_variables(dataset = dataset, method = "range", lower_bound = 0, upper_bound = 1)
          
          # create the boxplot
          boxplot <- ggplot(data = dataset, 
                            mapping = aes(x = dataset[,i], 
                                          y = dataset[,y_index])) + 
                     geom_boxplot(outlier.colour = 'red') + 
                     coord_flip() +
                     # Add labels to the plot
                     labs(title = paste("Standardised Boxplot of", xiname, 
                                        "and", yname, sep = " "), 
                          x = xiname, 
                          y = yname) + 
                     # Format the text of the plots 
            theme(axis.title.x = element_text(size = 20),
                  axis.title.y = element_text(size = 20),
                  plot.title = element_text(hjust = 0.5, size = 35),
                  axis.text.x = element_text(size = 20, face = "bold"),
                  axis.text.y = element_text(size = 20))
          
          # save the plot to the list
          plot_list[[p]] <- boxplot
          
          # print the plot
          print(boxplot)
          
          # save the plot to the sepecifed directory
          if(!is.null(directory)) {
            
            # save the plot
            ggsave(filename = paste("Standardised_Boxplot_of_", xiname, "_and_", yname, ".pdf", sep = ""), 
                   path = directory,
                   device = "pdf",
                   width = 8, 
                   height = 6, 
                   units = c("in"))
            
          }
          
          # print an update of the number of plots
          print(paste("Image", as.character(p), "Completed", sep = " "))
          
          # update plot index
          p = p + 1
          
        }
      
      }
    
    }
    
  }
  
  # click through the plots
  par(op)
  
}
oislen/BuenaVista documentation built on May 16, 2019, 8:12 p.m.