#' @title Visualise each Variable in a given Dataset
#'
#' @description Plots an appropriate visualisations for each attribute in a given dataset.
#' This function utilises ggplot2 to design and plot the Visualisations.
#' Plots a histogram for numeric variables.
#' Plots a bar chart for factor variables.
#' There is also an option to click through the points one by one, or print them all simultaneously.
#' The plots are outputed as a list.
#' The plots can also be saved to a specified directory.
#' Note, that it is crucial that the variables are correctly defined as either numeric or categorical.
#'
#' @param dataset A dataset to be visualise
#'
#' @param click A boolean value, indicating whether to click through the plots one by one, default is True,
#'
#' @param directory A character object specifying the directory where the data frame is to be saved as a .csv file.
#'
#' @return Outputs a variety of bar charts or histograms
#'
#' @import ggplot2
#'
#' @export
#'
#' @seealso \code{\link{visualise_qqplot}}, \code{\link{visualise_residuals}}, \code{\link{visualise_variables_xx}}
#'
#' @examples
#' #-- Example 1: LungCap Data --#
#'
#' # Regular Visualisations of the data frame, with click option
#' visualise_variables_x(dataset = lungcap, click = TRUE)
#'
#' # Regular visualisations without the click option
#' visualise_variables_x(dataset = lungcap, click = FALSE)
#'
#' #-- Example 2: Titanic Data --#
#'
#' # Regular visulations with click option
#' visualise_variables_x(dataset = titanic, click = TRUE)
#'
#' # Regular visulations without click option
#' visualise_variables_x(dataset = titanic, click = FALSE)
#'
visualise_variables_x <- function(dataset,
click = TRUE,
directory = NULL)
{
# set whether to click through the plots
op = par(ask = click)
#-------------------------------------------------------------------------#
# Plotting a Data Frame #
#-------------------------------------------------------------------------#
if(is.data.frame(dataset)){
# NOTE: ggplot2 automatically removes missing observations
# Make sure the datset is converted to a data frame
dataset <- as.data.frame(x = dataset)
# create an empty list to hold the pictures
plot_list <- list()
# p is the picture index - tracks the number of pictures created
p = 1
for (i in 1:ncol(dataset)) {
vname <- colnames(dataset)[i]
#------------------------------------------------------#
# (1) If the Variable is Categorical, Plot a Bar Chart #
#------------------------------------------------------#
if (is.factor(dataset[,i])){
# First it is neccesary to aggregate the raw data
agg_data <- as.data.frame(x = table(dataset[,i]),
col.names = c(vname, "Count"))
# Create the plot object
bar_chart <- ggplot(data = agg_data,
mapping = aes(x = agg_data[,1], y = agg_data[,2])) +
geom_bar(stat = "identity",
width = 0.5,
position = position_dodge(),
colour = "black",
fill = "lightblue") +
# Give the labels to the plots
labs(title = paste("Bar Chart of", vname, sep = " "),
x = vname,
y = "Count") +
# Format the text of the plots
theme(axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
plot.title = element_text(hjust = 0.5, size = 35),
axis.text.x = element_text(size = 20, face = "bold"),
axis.text.y = element_text(size = 20))
# save the plot to the list
plot_list[[i]] <- bar_chart
# print the plot
print(bar_chart)
# save the plot to the sepecifed directory
if(!is.null(directory)) {
ggsave(filename = paste("Bar_Chart_of_", vname, ".pdf", sep = ""),
path = directory,
device = "pdf",
width = 8,
height = 6,
units = c("in"))
}
#--------------------------------------------------#
# (2) If the Variable is Numeric, Plot a Histogram #
#--------------------------------------------------#
} else if (is.numeric(dataset[,i]) | is.double(dataset[,i])){
# create the plot object
histogram <- ggplot(data = dataset,
mapping = aes(x = dataset[,i])) +
geom_histogram(colour = "black",
fill = "lightblue",
bins = 30) +
# Give the labels to the plots
labs(title = paste("Histogram of", vname, sep = " "),
x = vname,
y = "Frequency") +
# Set the limits for the axises of the plots
coord_cartesian(xlim = c(min(dataset[,i], na.rm = T), max(dataset[,i], na.rm = T)),
expand = TRUE) +
# Format the text of the plots
theme(axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
plot.title = element_text(hjust = 0.5, size = 35),
axis.text.x = element_text(size = 20, face = "bold"),
axis.text.y = element_text(size = 20))
# save the plot to the list
plot_list[[i]] <- histogram
# print the plot
print(histogram)
# save the plot to the specified directory
if(!is.null(directory)) {
ggsave(filename = paste("Histogram_of_", vname, ".pdf", sep = ""),
path = directory,
device = "pdf",
width = 8,
height = 6,
units = c("in"))
}
}
# Print the picture index
print(paste("Image", as.character(p), "Completed", sep = " "))
# Update the picture index
p = p + 1
}
#-------------------------------------------------------------------------#
# Plotting a Vector #
#-------------------------------------------------------------------------#
} else if (is.vector(dataset)){
# ggplot requires a data frame for visualisations to be produced
# thus a simple data frame with two dimensions will be created
# in order to plot a single vector
#------------------------------------------------------#
# (1) If the Variable is Categorical, Plot a Bar Chart #
#------------------------------------------------------#
if (is.factor(dataset)){
# First it is neccesary to aggregate the raw data
agg_data <- as.data.frame(x = table(dataset),
col.names = c("vname", "Count"))
# Create the plot object
bar_chart <- ggplot(data = agg_data,
mapping = aes(x = agg_data[,1], y = agg_data[,2])) +
geom_bar(stat = "identity",
width = 0.5,
position = position_dodge(),
colour = "black",
fill = "lightblue") +
# Give the labels to the plots
labs(title = paste("Bar Chart of", "vname", sep = " "),
x = "vname",
y = "Count") +
# Format the text of the plots
theme(axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
plot.title = element_text(hjust = 0.5, size = 35),
axis.text.x = element_text(size = 20, face = "bold"),
axis.text.y = element_text(size = 20))
# print the plot
print(bar_chart)
# save the plot to the sepecifed directory
if(!is.null(directory)) {
ggsave(filename = paste("Bar_Chart_of_", "vname", ".pdf", sep = ""),
path = directory,
device = "pdf",
width = 8,
height = 6,
units = c("in"))
#--------------------------------------------------#
# (2) If the Variable is Numeric, Plot a Histogram #
#--------------------------------------------------#
} else if (is.numeric(dataset) | is.double(dataset)){
# create the plot object
histogram <- ggplot(data = dataset,
mapping = aes(x = dataset)) +
geom_histogram(colour = "black",
fill = "lightblue",
bins = 30) +
# Give the labels to the plots
labs(title = paste("Histogram of", "vname", sep = " "),
x = "vname",
y = "Frequency") +
# Set the limits for the axises of the plots
coord_cartesian(xlim = c(min(dataset, na.rm = T), max(dataset, na.rm = T)),
expand = TRUE) +
# Format the text of the plots
theme(axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
plot.title = element_text(hjust = 0.5, size = 35),
axis.text.x = element_text(size = 20, face = "bold"),
axis.text.y = element_text(size = 20))
# print the plot
print(histogram)
# save the plot to the specified directory
if(!is.null(directory)) {
ggsave(filename = paste("Histogram_of_", "vname", ".pdf", sep = ""),
path = directory,
device = "pdf",
width = 8,
height = 6,
units = c("in"))
}
}
}
}
# click through the plots
par(op)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.