#' Data Summary
#'
#' Uses the data frame generated by the readnames function and creates
#' graphics associated with the data.
#'
#' @param type The kind of output given. The possible outputs are \code{count}, \code{timeline},
#' \code{proportion}, \code{comparison}. \code{count} displays the frequency of the different
#' variables within the dataset each year. \code{timeline} creates a line graph of the change
#' in the number of graduation merit types that Jewish people from Williams receives each year.
#' \code{proportion} creates a line graph of the change in proportion in the graduation types
#' that Jewish people receive each year. \code{comparision} does the same thing as proportion,
#' but also includes the proportion of non-jewish people so comparisons can be made.
#' \code{differencetest} runs a difference of proportions test between jewish and non-jewish
#' students with H0: 0 and HA: not 0.
#'
#' @return Desired infomation from the kind of output specified by \code{type}.
#' @example datasummary(type)
#'
#' @import ggplot2
#' @export
datasummary <- function(type){
library(ggplot2)
##Pulls from the totaldata function so that the dataset can be initialized here
source("~/names/R/totaldata.R")
dat <- totaldata()
##Creates a summary of the data that are useful for the type of analysis
count <- table(unlist(dat))
num <- cbind(c(summary(dat$year),
summary(dat$major),
summary(dat$honors),
summary(dat$degree),
summary(dat$ethnicity)))
##Reformatting the datamatrix to make plotting easier
jew <- data.frame(table(dat$Jewstatus, dat$year, dat$degree))
colnames(jew) <- c("jewstatus", "year", "degree", "freq")
##Creates a dataframe of the counts of data
if (type == "count"){
num
}
else if (type == "timeline"){
##Creates a timeline of the proportion of jewish summa, magna, and cummas out of jewish people
##subsets the dataset so that I can call just jewish or not
jew <- jew[ jew$jewstatus == "jewish",]
##Puts everything together into a plot
ggplot(data=jew, aes(x = year, y = freq, group = degree, col= degree), alpha = 0.7) +
geom_point(size = 3) +
geom_line() +
ylab("Number of Jews") +
xlab("Year") +
ggtitle("Merit Levels of Graduating Jews\n") +
scale_color_discrete(name="Legend") +
theme(plot.title = element_text (size = 16, face = "bold", color = "purple"))
}
else if (type == "proportion"){
##Changes the number each year to a proportion each year and then graphs it
count <- data.frame(table(dat$year, dat$Jewstatus))
##Subsets the total number of jewish people each year
totaljew <- count[count$Var2 == "jewish",]
jewdenominator <- rep(c(totaljew$Freq), times = 5)
##Creates the list of just jewish
jewonly <- jew[ jew$jewstatus == "jewish",]
jewonly$freq/ jewdenominator
jewonly <- transform(jewonly, proportion = freq / jewdenominator)
##Puts everything together into a plot
ggplot(data = jewonly, aes(x = year, y = proportion, group = degree, col= degree), alpha = 0.7) +
geom_point(size = 3) +
geom_line() +
ylab("Proportion of Jews") +
xlab("Year") +
ggtitle("Proportion of Merit Levels of Jews\n") +
scale_color_discrete(name="Legend") +
theme(plot.title = element_text (size = 16, face = "bold", color = "purple"))
}
else if (type == "comparison"){
##Changes the number each year to a proportion each year and then graphs it
count <- data.frame(table(dat$year, dat$Jewstatus))
##Subsets the total number of jewish people each year
totaljew <- count[count$Var2 == "jewish",]
jewdenominator <- rep(c(totaljew$Freq), times = 5)
##Subsets the total number of non jewish people each year
totalnonjew <- count[count$Var2 == "not jewish",]
nondenominator <- rep(c(totalnonjew$Freq), times = 5)
##Creates the list of just jewish
jewonly <- jew[ jew$jewstatus == "jewish",]
jewonly$freq/ jewdenominator
jewonly <- transform(jewonly, proportion = freq / jewdenominator)
##Creates the list of non jewish
nonjewish <- jew[ jew$jewstatus == "not jewish",]
nonjewish$freq/ nondenominator
nonjewish <- transform(nonjewish, proportion = freq/nondenominator)
##Reconstructs the data into a big set
propdat <- rbind(jewonly, nonjewish)
##Puts everything together into a plot
ggplot(data=propdat, aes(x = year, y = proportion, group = interaction(degree, jewstatus), col= degree, shape = jewstatus), alpha = 0.7) +
geom_point(size = 3) +
geom_line() +
ylab("Proportion of Jews") +
xlab("Year") +
ggtitle("Proportion of Merit Levels of Jews Vs. Non-Jewish\n") +
scale_color_discrete(name="Legend") +
theme(plot.title = element_text (size = 16, face = "bold", color = "purple"))
}
else if (type == "differencetest"){
##Subsets the total number of jewish people each year
totaljew <- count[count$Var2 == "jewish",]
jewdenominator <- rep(c(totaljew$Freq), times = 5)
##Subsets the total number of non jewish people each year
totalnonjew <- count[count$Var2 == "not jewish",]
nondenominator <- rep(c(totalnonjew$Freq), times = 5)
##Creates the list of just jewish
jewonly <- jew[ jew$jewstatus == "jewish",]
jewonly$freq/ jewdenominator
jewonly <- transform(jewonly, proportion = freq / jewdenominator)
##Creates the list of non jewish
nonjewish <- jew[ jew$jewstatus == "not jewish",]
nonjewish$freq/ nondenominator
nonjewish <- transform(nonjewish, proportion = freq/nondenominator)
##Subsets the dataframe into proportions separated by cum laude, magna cum laude, summa cum laude, masters
##Then runs the t.test function to test for a difference of means between the success rates of each group
##Bachelors of Arts test
bachjew <- jewonly[jewonly$degree == "Bachelor of Arts",]
bachnon <- nonjewish[nonjewish$degree == "Bachelor of Arts",]
t.test(bachjew$proportion, bachnon$proportion)
##Cum Laude test
cumjew <- jewonly[jewonly$degree == "Cum Laude",]
cumnon <- nonjewish[nonjewish$degree == "Cum Laude",]
t.test(cumjew$proportion, cumnon$proportion)
##Magna Cum Laude test
magnajew <- jewonly[jewonly$degree == "Magna Cum Laude",]
magnanon <- nonjewish[nonjewish$degree == "Magna Cum Laude",]
t.test(magnajew$proportion, magnanon$proportion)
##Summa Cum Laude test
summajew <- jewonly[jewonly$degree == "Summa Cum Laude",]
summanon <- nonjewish[nonjewish$degree == "Summa Cum Laude",]
t.test(summajew$proportion, summanon$proportion)
##Masters Test
masterjew <- jewonly[jewonly$degree == "Masters",]
masternon <- nonjewish[nonjewish$degree == "Masters",]
t.test(masterjew$proportion, masternon$proportion)
}
else{
warning("The specified type does not exist. Enter ?statsummary or help(statsummary) to view all the summary options available.")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.