#'@title error_bars
#'@description analysis of error bars for a variable
#'@param df name of the dataframe that you want to analyse. This MUST be of the format data.frame(x,y). 'NA' and 'Inf' values are also not allowed.
#'@param s sequence of values (generated by seq(x,y,z)) - optional input: if not provided it will be calculated automatically.
#'@param draw optional input to draw disperseion on the error plot; can take values 'standard error', 'standard deviation' or 'both'. By default it takes 'both'.
#'@return an 'error_bars.out' object which contains (i) a dataframe of useful summary variables (ii) a dot plot showing the error bars, and (iii) a copy of the original dataframe with the bin_x variable appended as a third column.
#'@export
error_bars = function(df, s = NULL, draw='both'){
library(ggplot2)
library(dplyr)
# where df= the dataframe containing the values. It must be formated as (data.frame(x,y)) for the code to work.
# s = a vector of cuts to split up the data by (this can also be generated automatically)
# if (!'ggplot2' %in% installed.packages())
# stop("ggplot2 must be installed and loaded to continue")
#
# if(!"package:ggplot2" %in% search())
# print('ggplot2 is now loaded in the environment. Rerun the command to use the error_bars() function')
# library(ggplot2)
#
# if (!'dplyr' %in% installed.packages())
# stop("dplyr must be installed and loaded to continue")
#
# if(!"package:dplyr" %in% search())
# print('dplyr is now loaded in the environment. Rerun the command to use the error_bars() function')
# library(dplyr)
# check the format of the variables:
if (!missing(s)){
if (!is.numeric(s))
stop("'s' must be a numeric vector")
if (length(s) < 3)
stop("'s' must be at least 3 values long")
}
# check the dataframe:
if (class(df) != "data.frame"){
stop("df must be a dataframe object and must have only two columns (x,y). If your input looks like a dataframe, check it by using class() and as.data.frame() functions ")
}
# I believe that sometimes the dataframe can take on several data type objects at once (including dataframe)- so just make sure that it is actually a df:
df = as.data.frame(df)
if (ncol(df) != 2){
stop("df must have only two columns: (x,y)")
}
# check whether there are any missing or Inf values:
app = apply(df, 2, function(x) any(is.na(x) | is.infinite(x)))
if (app[1] == TRUE){
stop("the data frame contains either Infinite or NA values. These must be removed.")
}
if (app[2] == TRUE){
stop("the data frame contains either Infinite or NA values. These must be removed.")
}
rm(app)
# check draw:
if (draw == 'standard error'){
cont = 1
} else if (draw == 'standard deviation'){
cont = 1
} else if (draw == 'both'){
cont = 1
} else {
cont = 0
}
if (cont == 0){
stop("'draw can only take the values 'standard error', 'standard deviation', or 'both'.")
}
# create 'x' object:
x = df[,1]
# create 's' vector if it is missing:
if(missing(s)) {
a = min(floor(x))
b = max(ceiling(x))
c = (b - a) / 10
d = seq(a, b, c)
s = d[2:length(d)]
}
# make a copy of the df to manipulate
df.copy = df
# rename the column names:
colnames(df.copy) = c('x', 'y')
# create the bin_x
df.copy$bin_x = cut(x, breaks = c(-Inf, s, Inf),
labels = c(s, Inf))
df.copy$bin_x <- as.numeric(as.character(df.copy$bin_x))
# create the summary variables in df.summary:
df.summary <- df.copy %>%
group_by(bin_x) %>%
summarize(xmean = mean(x),
ymin = min(y),
ymax = max(y),
ymean = mean(y),
ysd = sd(y),
ymean_plus_sd = mean(y) + sd(y),
ymean_minus_sd = mean(y) - sd(y),
yse = sd(y)/sqrt(length(y)),
ymean_plus_se = mean(y) + sd(y)/sqrt(length(y)),
ymean_minus_se = mean(y) - sd(y)/sqrt(length(y))
)
df.summary$bin_x <- as.numeric(as.character(df.summary$bin_x))
df.summary$num = dplyr::count(df.copy, bin_x)$n
if (draw == 'both'){
errorbars_plot = ggplot(df.summary, aes(bin_x, ymean)) +
geom_point(size = 2) +
geom_errorbar(aes(ymin = ymean_minus_sd, ymax = ymean_plus_sd, width=0.75, color= 'Standard Deviation')) +
geom_errorbar(aes(ymin = ymean_minus_se, ymax = ymean_plus_se, width=0.75, color = 'Standard Error')) +
labs(x = 'x bin', y = 'y values') +
ggtitle('Plot of error bars') +
scale_colour_discrete(name="Measures") +
theme_bw()
}
if (draw == 'standard error'){
errorbars_plot = ggplot(df.summary, aes(bin_x, ymean)) +
geom_point(size = 2) +
#geom_errorbar(aes(ymin = ymean_minus_sd, ymax = ymean_plus_sd, width=0.75, color= 'Standard Deviation')) +
geom_errorbar(aes(ymin = ymean_minus_se, ymax = ymean_plus_se, width=0.75, color = 'Standard Error')) +
labs(x = 'x bin', y = 'y values') +
ggtitle('Plot of error bars') +
scale_colour_discrete(name="Measures") +
theme_bw()
}
if (draw == 'standard deviation'){
errorbars_plot = ggplot(df.summary, aes(bin_x, ymean)) +
geom_point(size = 2) +
geom_errorbar(aes(ymin = ymean_minus_sd, ymax = ymean_plus_sd, width=0.75, color= 'Standard Deviation')) +
#geom_errorbar(aes(ymin = ymean_minus_se, ymax = ymean_plus_se, width=0.75, color = 'Standard Error')) +
labs(x = 'x bin', y = 'y values') +
ggtitle('Plot of error bars') +
scale_colour_discrete(name="Measures") +
theme_bw()
}
error_bars.out = structure(list(plot = errorbars_plot,
df.summary = df.summary,
df = df.copy),
class = 'error_bars.object')
return(error_bars.out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.