R/visualise_variables_x.R

#' @title Visualise each Variable in a given Dataset
#' 
#' @description Plots an appropriate visualisations for each attribute in a given dataset.
#' This function utilises ggplot2 to design and plot the Visualisations.
#' Plots a histogram for numeric variables. 
#' Plots a bar chart for factor variables.
#' There is also an option to click through the points one by one, or print them all simultaneously.
#' The plots are outputed as a list.
#' The plots can also be saved to a specified directory.
#' Note, that it is crucial that the variables are correctly defined as either numeric or categorical.
#' 
#' @param dataset A dataset to be visualise
#'  
#' @param click A boolean value, indicating whether to click through the plots one by one, default is True,
#'  
#' @param directory A character object specifying the directory where the data frame is to be saved as a .csv file.
#'
#' @return Outputs a variety of bar charts or histograms
#' 
#' @import ggplot2
#' 
#' @export
#' 
#' @seealso \code{\link{visualise_qqplot}}, \code{\link{visualise_residuals}}, \code{\link{visualise_variables_xx}}
#' 
#' @examples 
#' #-- Example 1: LungCap Data --#
#' 
#' # Regular Visualisations of the data frame, with click option
#' visualise_variables_x(dataset = lungcap, click = TRUE)
#' 
#' # Regular visualisations without the click option
#' visualise_variables_x(dataset = lungcap, click = FALSE)
#' 
#' #-- Example 2: Titanic Data --#
#' 
#' # Regular visulations with click option
#' visualise_variables_x(dataset = titanic, click = TRUE)
#' 
#' # Regular visulations without click option
#' visualise_variables_x(dataset = titanic, click = FALSE)
#' 
visualise_variables_x <- function(dataset, 
                                  click = TRUE,
                                  directory = NULL) 
  {
  
  # set whether to click through the plots
  op = par(ask = click)
  
  #-------------------------------------------------------------------------#
  # Plotting a Data Frame                                                   #
  #-------------------------------------------------------------------------#
  
  if(is.data.frame(dataset)){
    
    # NOTE: ggplot2 automatically removes missing observations
    # Make sure the datset is converted to a data frame
    dataset <- as.data.frame(x = dataset)
    
    # create an empty list to hold the pictures
    plot_list <- list()
    
    # p is the picture index - tracks the number of pictures created
    p = 1
    
    for (i in 1:ncol(dataset)) {
      
      vname <- colnames(dataset)[i]
      
      #------------------------------------------------------#
      # (1) If the Variable is Categorical, Plot a Bar Chart #
      #------------------------------------------------------#
      
      if (is.factor(dataset[,i])){
        
        # First it is neccesary to aggregate the raw data
        agg_data <- as.data.frame(x = table(dataset[,i]), 
                                  col.names = c(vname, "Count"))
        
        # Create the plot object
        bar_chart <- ggplot(data = agg_data, 
                            mapping = aes(x = agg_data[,1], y = agg_data[,2])) +
                            geom_bar(stat = "identity", 
                                     width = 0.5,
                                     position = position_dodge(),
                                     colour = "black", 
                                     fill = "lightblue") + 
                            # Give the labels to the plots
                            labs(title = paste("Bar Chart of", vname, sep = " "), 
                                 x = vname, 
                                 y = "Count") +
                            # Format the text of the plots 
                            theme(axis.title.x = element_text(size = 20),
                                  axis.title.y = element_text(size = 20),
                                  plot.title = element_text(hjust = 0.5, size = 35),
                                  axis.text.x = element_text(size = 20, face = "bold"),
                                  axis.text.y = element_text(size = 20))
        
        # save the plot to the list
        plot_list[[i]] <- bar_chart
        
        # print the plot
        print(bar_chart)
        
        # save the plot to the sepecifed directory
        
        if(!is.null(directory)) {
          
          ggsave(filename = paste("Bar_Chart_of_", vname, ".pdf", sep = ""), 
                 path = directory,
                 device = "pdf",
                 width = 8, 
                 height = 6, 
                 units = c("in"))
          
        }
        
        #--------------------------------------------------#
        # (2) If the Variable is Numeric, Plot a Histogram #
        #--------------------------------------------------#
        
      } else if (is.numeric(dataset[,i]) | is.double(dataset[,i])){
        
        # create the plot object
        histogram <- ggplot(data = dataset, 
                            mapping = aes(x = dataset[,i])) + 
                            geom_histogram(colour = "black",
                                           fill = "lightblue", 
                                           bins = 30) + 
                            # Give the labels to the plots
                            labs(title = paste("Histogram of", vname, sep = " "), 
                                 x = vname, 
                                 y = "Frequency") + 
                            # Set the limits for the axises of the plots
                            coord_cartesian(xlim = c(min(dataset[,i], na.rm = T), max(dataset[,i], na.rm = T)),
                                            expand = TRUE) +
                            # Format the text of the plots 
          theme(axis.title.x = element_text(size = 20),
                axis.title.y = element_text(size = 20),
                plot.title = element_text(hjust = 0.5, size = 35),
                axis.text.x = element_text(size = 20, face = "bold"),
                axis.text.y = element_text(size = 20))
        
        # save the plot to the list
        plot_list[[i]] <- histogram
        
        # print the plot
        print(histogram)
        
        # save the plot to the specified directory
        if(!is.null(directory)) {
          
          ggsave(filename = paste("Histogram_of_", vname, ".pdf", sep = ""), 
                 path = directory,
                 device = "pdf",
                 width = 8, 
                 height = 6, 
                 units = c("in"))
          
        }
        
      }
      
      # Print the picture index
      print(paste("Image", as.character(p), "Completed", sep = " "))
      
      # Update the picture index
      p = p + 1
      
    }
    
    #-------------------------------------------------------------------------#
    # Plotting a Vector                                                       #
    #-------------------------------------------------------------------------#
    
  } else if (is.vector(dataset)){
    
    # ggplot requires a data frame for visualisations to be produced
    # thus a simple data frame with two dimensions will be created 
    # in order to plot a single vector
    
    #------------------------------------------------------#
    # (1) If the Variable is Categorical, Plot a Bar Chart #
    #------------------------------------------------------#
    
    if (is.factor(dataset)){
      
      # First it is neccesary to aggregate the raw data
      agg_data <- as.data.frame(x = table(dataset), 
                                col.names = c("vname", "Count"))
      
      # Create the plot object
      bar_chart <- ggplot(data = agg_data, 
                          mapping = aes(x = agg_data[,1], y = agg_data[,2])) +
        geom_bar(stat = "identity", 
                 width = 0.5,
                 position = position_dodge(),
                 colour = "black", 
                 fill = "lightblue") + 
        # Give the labels to the plots
        labs(title = paste("Bar Chart of", "vname", sep = " "), 
             x = "vname", 
             y = "Count") +
        # Format the text of the plots 
        theme(axis.title.x = element_text(size = 20),
              axis.title.y = element_text(size = 20),
              plot.title = element_text(hjust = 0.5, size = 35),
              axis.text.x = element_text(size = 20, face = "bold"),
              axis.text.y = element_text(size = 20))
      # print the plot
      print(bar_chart)
      # save the plot to the sepecifed directory
      if(!is.null(directory)) {
        ggsave(filename = paste("Bar_Chart_of_", "vname", ".pdf", sep = ""), 
               path = directory,
               device = "pdf",
               width = 8, 
               height = 6, 
               units = c("in"))
        
        #--------------------------------------------------#
        # (2) If the Variable is Numeric, Plot a Histogram #
        #--------------------------------------------------#
        
     } else if (is.numeric(dataset) | is.double(dataset)){
        
       # create the plot object
        histogram <- ggplot(data = dataset, 
                            mapping = aes(x = dataset)) + 
          geom_histogram(colour = "black",
                         fill = "lightblue", 
                         bins = 30) + 
          # Give the labels to the plots
          labs(title = paste("Histogram of", "vname", sep = " "), 
               x = "vname", 
               y = "Frequency") + 
          # Set the limits for the axises of the plots
          coord_cartesian(xlim = c(min(dataset, na.rm = T), max(dataset, na.rm = T)),
                          expand = TRUE) +
          # Format the text of the plots 
          theme(axis.title.x = element_text(size = 20),
                axis.title.y = element_text(size = 20),
                plot.title = element_text(hjust = 0.5, size = 35),
                axis.text.x = element_text(size = 20, face = "bold"),
                axis.text.y = element_text(size = 20))
        
        # print the plot
        print(histogram)
        
        # save the plot to the specified directory
        if(!is.null(directory)) {
            
          ggsave(filename = paste("Histogram_of_", "vname", ".pdf", sep = ""), 
                 path = directory,
                 device = "pdf",
                 width = 8, 
                 height = 6, 
                 units = c("in"))
          
        }
        
      }
      
    }
    
  }
 
  # click through the plots
  par(op)
   
}
oislen/BuenaVista documentation built on May 16, 2019, 8:12 p.m.