#' Make a butterfly plot
#'
#' @param sex Sex 1 = Male, 2 = Female
#' @param age Age at diagnosis
#' @param title Plot title
#' @param infoHeight Position (the center) of summary statistics (use age units)
#' @param infoWidth Position (the center) of summarry statistics (use density units)
#' @param ageLimit Where to crop the top of age distribution (85 or 110 years)
#'
#' @import ggplot2
#' @importFrom dplyr case_when
#' @importFrom ggthemes theme_few
#'
#' @return ggplot
#' @export
#'
#'
#' @examples
#' butterfly(malesFemales1910$sex, malesFemales1910$age, "Census 1910")
#' butterfly(malesFemales2010$sex, malesFemales2010$age, "Census 2010",
#' infoHeight = 25, ageLimit = 110)
butterfly <- function(sex, age, title = NULL, infoHeight = 10,
infoWidth = 0.037, ageLimit = 85) {
if (!(is.numeric(sex))) stop("Sex must be a number.")
if (any(!(sex %in% c(NA, 1, 2)))) stop("Sex must be 1 or 2.")
if (!(is.numeric(age))) stop("Age must be a number.")
if (any(age[!is.na(age)] < 0)) stop("Age needs to be greater than 0")
if (any(age[!is.na(age)] > 140)) stop("Age needs to be less than 140")
sex <- as.character(sex)
sex[sex =="1"] <- "Male"
sex[sex == "2"] <- "Female"
Males <- data.frame(Males = age[sex == "Male"])
Females <- data.frame(Females = age[sex == "Female"])
sex <- table(sex)
if( (nrow(Males) > 0)){
statsMales <- summary(Males$Males)
} else{
statsMales <- rep(0, 6)
sex <- c(sex, 0)
}
if( (nrow(Females) > 0)){
statsFemales <- summary(Females$Females)
} else{
statsFemales <- rep(0, 6)
sex <- c(0, sex)
}
femaleCount <-
case_when(
sex[1] == 0 ~ "0",
sex[1] > 0 & sex[1] < 10 ~ "< 10",
sex[1] < 50 ~ "< 50",
sex[1] < 100 ~ "< 100",
sex[1] < 250 ~ "< 250",
sex[1] < 500 ~ "< 500",
sex[1] < 1000 ~ "< 1000",
sex[1] < 2500 ~ "< 2500",
sex[1] < 5000 ~ "< 5000",
sex[1] >= 5000 ~ "5000+"
)
maleCount <-
case_when(
sex[2] == 0 ~ "0",
sex[2] > 0 & sex[2] < 10 ~ "< 10",
sex[2] < 50 ~ "< 50",
sex[2] < 100 ~ "< 100",
sex[2] < 250 ~ "< 250",
sex[2] < 500 ~ "< 500",
sex[2] < 1000 ~ "< 1000",
sex[2] < 2500 ~ "< 2500",
sex[2] < 5000 ~ "< 5000",
sex[2] >= 5000 ~ "5000+"
)
plot <-
ggplot() +
scale_fill_manual(limits = c(" Male", " Female"),
values = c("#89cff0", "#FFC0CB")) +
theme_few() +
theme(
axis.text = element_text(size = 18),
axis.title = element_text(size = 18,
face = "bold"),
panel.grid.major.x = element_line(colour = "lightgray"),
panel.grid.major.y = element_line(colour = "lightgray"),
legend.title = element_blank(),
legend.position = "none") +
scale_y_continuous(labels = NULL, name = "", limits = c(-.05, .05)) +
#scale_x_continuous(breaks = c(0, seq(30, 110, by = 10)),limits = c(0, 110)) + # 30 - 110
#scale_x_continuous(limits = c(0, 85), breaks = c(0, seq(20, 90, by = 10))) +# 20 - 90
labs(x = "Age\n" ) +
guides(fill = guide_legend(
keywidth = .4,
keyheight = .4,
default.unit = "inch")
) +
coord_flip() +
annotate("text", x = infoHeight, y = (infoWidth * -1), label = paste0(
"Male (", max(c(round(sex[2]/sum(sex), 2), 0), na.rm =T) * 100,
"%)\n People: ", sex[2], #maleCount,
"\nAverage: ", round(statsMales[4], 1),
"\nMiddle 50%: ", round(statsMales[2]), " to ", round(statsMales[5]))) +
annotate("text", x = infoHeight, y = infoWidth, label = paste0(
"Female (", max(c(round(sex[1]/sum(sex), 2), 0), na.rm =T) * 100,
"%)\n People: ", sex[1], #femaleCount,
"\nAverage: ", round(statsFemales[4], 1),
"\nMiddle 50%: ", round(statsFemales[2]), " to ", round(statsFemales[5]))) +
labs(title = title)
if (ageLimit == 110) {
plot <- plot +
scale_x_continuous(breaks = c(0, seq(30, 110, by = 10)), limits = c(0, 110)) # 30 - 110
} else {
plot <- plot +
scale_x_continuous(limits = c(0, 85), breaks = c(0, seq(20, 90, by = 10))) # 20 - 90
}
if (nrow(Males) > 1){
maleLabelHeight <- as.integer(round((statsMales[5] - statsMales[2])/2 +
statsMales[2]))
plot <- plot + geom_density(aes(x = Males, y = -(..density..), fill = " Male"),
alpha = .5, data = Males, na.rm =TRUE)
maleDensity <- ggplot_build(plot)$data[[3]]
plot <- plot +
geom_area(data= subset(maleDensity,
x > statsMales[2] & x < statsMales[5]),
aes(x = x, y = y), fill = "lightblue") +
annotate("text", y = (infoWidth * -1), x = maleLabelHeight,
label = "Typical Male Age Range \n (Middle 50%)")
}
if (nrow(Females) > 1){
femaleLabelHeight <- as.integer(round((statsFemales[5] - statsFemales[2])/2 +
statsFemales[2]))
plot <- plot + geom_density(aes(x = Females, fill = " Female"), alpha = .5,
data = Females, na.rm = TRUE)
if (nrow(Males) > 1){
femaleDensity <- ggplot_build(plot)$data[[6]]
} else {
femaleDensity <- ggplot_build(plot)$data[[3]]
}
plot <- plot +
geom_area(data= subset(femaleDensity,
x > statsFemales[2] & x < statsFemales[5]),
aes(x = x, y = y), fill = "pink", na.rm = TRUE) +
annotate("text", y = infoWidth, x = femaleLabelHeight,
label = "Typical Female Age Range \n (Middle 50%)")
}
plot <- plot +
geom_abline(slope = 0, intercept = 0)
plot
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.