# returns a list of plots, one for each subgroup within the group
# TODO - are we still using the textual and numerical data - assume so for now.1
# TODO don't cleanup the data, remove it
# TODO figure out the question stuff
# Pass in the raw numerical data frame. No altering, don't get rid of the top rows
#' T
#'
#' Assumes df has been cleaned and converted.
#'
#'@param df
#'@param org
#'@param group_name
#'@param usePercent
#'@param getAxisLimits
#'@param getAxisLabels
#'@param getGraphLabels
#'@param text_df
#'@param label_df
#'
#'@importFrom rlang .data
#'
#'@return
#'@export
makeGraphs <- function(df, org, group_name, usePercent = TRUE, getXAxisLimits = getAxisLimits, getXAxisLabels = getAxisLabels, getGraphLabels = getLabelByGrouping, text_df = NULL, graph_label_df = NULL)
{
print("makeGraphs")
# get data for the group
group_data <- groupData(df, org, group_name)# 1 - filtering out all non-numerical data
if (is.null(group_data))
{
warning("makeGraphs: Call to groupData returned NULL. makeGraphs will return with NULL as well. ")
return(NULL)
}
scenario_plots <- list()
p_index = 1
#print(group_data)
# get the means and standard deviations for that scenario and store them in a new data frame
summ_mean <- dplyr::summarise_all(dplyr::select_if(dplyr::group_by(group_data, .data$category), is.numeric), list(~mean(., na.rm = TRUE)))
summ_all <- dplyr::summarise_all(dplyr::select_if(dplyr::group_by(group_data, .data$category), is.numeric), list(~sd(., na.rm = TRUE), ~mean(., na.rm = TRUE)))
print("Made it to here!")
# round the means and standard deviations
#summ_all <- dplyr::mutate_at(summ_all, dplyr::vars(dplyr::contains("sd")), ~round(., digits = 3))
#summ_mean <- dplyr::mutate_at(summ_mean, dplyr::vars(-dplyr::contains("category")), ~round(., digits = 3))
# TODO - future feature.
# get text versions for labels - https://stackoverflow.com/questions/34189531/subset-or-grep-for-selectively-pasting-using-geom-text-in-r
#summ_mean_text <- dplyr::mutate_all(summ_mean, funs(paste("mean: ", ., sep="")))
#summ_std_text <- dplyr::mutate_all(dplyr::select(summ_all, dplyr::contains("sd")), funs(paste("std: ", ., sep="")))
# graph the numerical columns only and iterate through them, creating a graph for each column
num_data <- dplyr::select_if(group_data, is.numeric)
for (name in colnames(num_data))
{
# create labels and limits for the x axis
labels <- getXAxisLabels(text_df, name)
limits <- getXAxisLimits(labels)
# Remove "X_" from subgroup name if previously added so we can have the plain subgroup name to put in the graph title
lbl_name <- stringr::str_replace(name,"X_","")# remove X_ from subgroup_name if we added it previously (We would do so if it started with a number)
# we want to drop all rows in the column name that are NA:
dropped_group_data <- tidyr::drop_na(group_data, dplyr::any_of(name))#dplyr::drop_na(group_data, dplyr::any_of(name))
# create mean label y positions vector
vec <- seq(0.7, 0.69+0.08*length(unique(summ_mean$category)), by = +0.08)
# short function to deal with NA's in mean when labeling
mean_handle <- function(name) {
if (is.na(name))
{
return("NA")
}
return(name)
}
if (usePercent)
{
# make the graph
p <- ggplot2::ggplot(data = dropped_group_data, ggplot2::aes_string(x=name), position = ggplot2::position_dodge(1.0)) +
# draw and label standard deviation
#ggplot2::geom_tile(data=dplyr::mutate_at(summ_all, dplyr::vars(dplyr::contains("sd")), ~.*2), ggplot2::aes_string(y = .5, x =paste(name,"mean", sep="_"), height = Inf, width = paste(name,"sd", sep="_"), fill = "category", group = "category"), alpha = 0.3, show.legend = FALSE) +
#ggplot2::geom_label(data=summ_all, y = vec, ggplot2::aes_string(x = name, label = paste("mean:",name,sep=" "), group = "category", fill = "category" ), show.legend = FALSE, nudge_y =.5) +
# draw the bars of the graph
ggplot2::geom_bar(data =dropped_group_data, mapping = ggplot2::aes_string(x = name, y = "..prop..", group = "category", fill = "category", color = "category"), stat = "count", position = ggplot2::position_dodge(1)) +
# set axis scales
ggplot2::scale_x_discrete(limits = limits, labels = labels) +#, labels = labels) +
ggplot2::expand_limits(x = 1) +
ggplot2::ylab("Percentage") +
ggplot2::xlab(getGraphLabels(labels = graph_label_df, question_name = getNameByOrg(org, group_name, name), group_name = group_name, subgroup_name = name, org = org)) +
ggplot2::ylim(c(0, 1)) +
# write question
ggplot2::ggtitle(label = paste("Group: ",group_name, " - Subgroup: ",lbl_name, sep="")) +#, subtitle = getGraphLabels(labels = graph_label_df, question_name = getNameByOrg(org, group_name, name), group_name = group_name, subgroup_name = name, org = org)) +
# create and label mean line
ggplot2::geom_vline(data=summ_mean, ggplot2::aes_string(xintercept=name, color = "category"), linetype =2, show.legend = FALSE) +
ggplot2::geom_label(data=summ_mean, y = vec, ggplot2::aes_string(x = name, label = name, group = "category", fill = "category" ), show.legend = FALSE, nudge_y =.5) +
# label each bar with count
ggplot2::geom_text(stat = "count", color = "black", ggplot2::aes_string(x = name, y = "..prop.. - 0.05", label= "..count..", group = "category"), position = ggplot2::position_dodge(1), show.legend = FALSE)
}
else # use count instead
{
# TODO
}
# add mean line labels
# --- add in the t- test ---
# make a list of unique categories
unique_cat <- unique(dropped_group_data$category)
# if there are only two unique categories:
if (length(unique_cat) == 2)
{
# group by the categories
grp1 <- stats::na.omit(dplyr::filter(dropped_group_data, .data$category == unique_cat[[1]]))
grp2 <- stats::na.omit(dplyr::filter(dropped_group_data, .data$category == unique_cat[[2]]))
ttest = tryCatch({
ttest <- stats::t.test(grp1[[name]], grp2[[name]], conf.level = 0.95)
n <- ttest$p.value
#TODO - not showing up on graph
p + ggplot2::geom_label(ggplot2::aes_string(label =ttest$p.value, x = -2.5, y = .9))
#cat("group:",group_name,"subgroup:",lbl_name,unique_cat[[1]],"vs",unique_cat[[2]],"p-value:",ttest$p.value,"\n" )
}, error = function(e) {
warning(paste("Running a ttest FAILED with error: ",e,sep=""))
})
}
# place the graph in the list
scenario_plots[[p_index]] = p
p_index = p_index + 1
}
return(scenario_plots)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.