R/butterfly.R

Defines functions butterfly

Documented in butterfly

#' Make a butterfly plot
#'
#' @param sex Sex 1 = Male, 2 = Female
#' @param age Age at diagnosis
#' @param title Plot title
#' @param infoHeight Position (the center) of summary statistics (use age units)
#' @param infoWidth Position (the center) of summarry statistics (use density units)
#' @param ageLimit Where to crop the top of age distribution (85 or 110 years)
#'
#' @import ggplot2
#' @importFrom dplyr case_when
#' @importFrom ggthemes theme_few
#'
#' @return ggplot
#' @export
#'
#'
#' @examples
#' butterfly(malesFemales1910$sex, malesFemales1910$age, "Census 1910")
#' butterfly(malesFemales2010$sex, malesFemales2010$age, "Census 2010", 
#'           infoHeight = 25, ageLimit = 110)


butterfly <- function(sex, age, title = NULL, infoHeight = 10, 
                      infoWidth =  0.037, ageLimit = 85) {

  if (!(is.numeric(sex))) stop("Sex must be a number.")  
  if (any(!(sex %in% c(NA, 1, 2)))) stop("Sex must be 1 or 2.")
  
  if (!(is.numeric(age))) stop("Age must be a number.")  
  if (any(age[!is.na(age)] < 0)) stop("Age needs to be greater than 0")    
  if (any(age[!is.na(age)] > 140)) stop("Age needs to be less than 140")

  sex <- as.character(sex)
  sex[sex =="1"] <- "Male"
  sex[sex == "2"] <- "Female"


  Males <- data.frame(Males = age[sex == "Male"])
  Females <- data.frame(Females = age[sex == "Female"])

  sex <- table(sex)

  if( (nrow(Males) > 0)){
    statsMales <- summary(Males$Males)
  } else{
    statsMales <- rep(0, 6)
    sex <- c(sex, 0)
  }

  if( (nrow(Females) > 0)){
    statsFemales <- summary(Females$Females)
  } else{
    statsFemales <- rep(0, 6)
    sex <- c(0, sex)
  }

  femaleCount <-
    case_when(
      sex[1] == 0 ~ "0",
      sex[1] > 0 & sex[1] < 10 ~ "< 10",
      sex[1] < 50 ~ "< 50",
      sex[1] < 100 ~ "< 100",
      sex[1] < 250 ~ "< 250",
      sex[1] < 500 ~ "< 500",
      sex[1] < 1000 ~ "< 1000",
      sex[1] < 2500 ~ "< 2500",
      sex[1] < 5000 ~ "< 5000",
      sex[1] >= 5000 ~ "5000+"
    )

  maleCount <-
    case_when(
      sex[2] == 0 ~ "0",
      sex[2] > 0 & sex[2] < 10 ~ "< 10",
      sex[2] < 50 ~ "< 50",
      sex[2] < 100 ~ "< 100",
      sex[2] < 250 ~ "< 250",
      sex[2] < 500 ~ "< 500",
      sex[2] < 1000 ~ "< 1000",
      sex[2] < 2500 ~ "< 2500",
      sex[2] < 5000 ~ "< 5000",
      sex[2] >= 5000 ~ "5000+"
    )

  plot <-
    ggplot() +

    scale_fill_manual(limits = c(" Male", " Female"),
                      values = c("#89cff0", "#FFC0CB")) +
    theme_few() +
    theme(
      axis.text = element_text(size = 18),
      axis.title = element_text(size = 18,
                                face = "bold"),
      panel.grid.major.x = element_line(colour = "lightgray"),
      panel.grid.major.y = element_line(colour = "lightgray"),
      legend.title = element_blank(),
      legend.position = "none") +
    scale_y_continuous(labels = NULL, name = "", limits = c(-.05, .05)) +

    #scale_x_continuous(breaks = c(0, seq(30, 110, by = 10)),limits = c(0, 110)) + # 30 - 110
    #scale_x_continuous(limits = c(0, 85), breaks = c(0, seq(20, 90, by = 10))) +# 20 - 90
    
    labs(x = "Age\n" ) +
    guides(fill = guide_legend(
      keywidth = .4,
      keyheight = .4,
      default.unit = "inch")
    ) +
    coord_flip()  +
    annotate("text", x = infoHeight, y = (infoWidth * -1), label = paste0(
      "Male (", max(c(round(sex[2]/sum(sex), 2), 0), na.rm =T) * 100,
      "%)\n People: ", sex[2], #maleCount,
      "\nAverage: ", round(statsMales[4], 1),
      "\nMiddle 50%: ", round(statsMales[2]), " to ", round(statsMales[5]))) +
    annotate("text", x = infoHeight, y =  infoWidth, label = paste0(
      "Female (", max(c(round(sex[1]/sum(sex), 2), 0), na.rm =T) * 100,
      "%)\n People: ", sex[1], #femaleCount,
      "\nAverage: ", round(statsFemales[4], 1),
      "\nMiddle 50%: ", round(statsFemales[2]), " to ", round(statsFemales[5]))) +
    labs(title = title)

  
  if (ageLimit == 110) {
   plot <- plot + 
     scale_x_continuous(breaks = c(0, seq(30, 110, by = 10)), limits = c(0, 110)) # 30 - 110
  } else {
    plot <- plot +
        scale_x_continuous(limits = c(0, 85), breaks = c(0, seq(20, 90, by = 10))) # 20 - 90
  }

  if (nrow(Males) > 1){
    maleLabelHeight <- as.integer(round((statsMales[5] - statsMales[2])/2 +
                                          statsMales[2]))

    plot <- plot + geom_density(aes(x = Males, y = -(..density..), fill = " Male"), 
                                alpha = .5, data = Males, na.rm =TRUE)
    maleDensity <- ggplot_build(plot)$data[[3]]

    plot <- plot +
      geom_area(data= subset(maleDensity,
                             x > statsMales[2] & x < statsMales[5]),
                aes(x = x, y = y), fill = "lightblue") +
      annotate("text", y = (infoWidth * -1), x = maleLabelHeight,
               label = "Typical Male Age Range \n (Middle 50%)")

  }

  if (nrow(Females) > 1){

    femaleLabelHeight <- as.integer(round((statsFemales[5] - statsFemales[2])/2 +
                                            statsFemales[2]))

    plot <- plot + geom_density(aes(x = Females, fill = " Female"), alpha = .5, 
                                data = Females, na.rm = TRUE)

    if (nrow(Males) > 1){
      femaleDensity <- ggplot_build(plot)$data[[6]]
    } else {
      femaleDensity <- ggplot_build(plot)$data[[3]]
    }

    plot <- plot +
      geom_area(data= subset(femaleDensity,
                             x > statsFemales[2] & x < statsFemales[5]),
                aes(x = x, y = y), fill = "pink", na.rm = TRUE) +
        annotate("text", y = infoWidth, x = femaleLabelHeight,
                 label = "Typical Female Age Range \n (Middle 50%)")
  }

  plot <- plot +
    geom_abline(slope = 0, intercept = 0)
  plot

}
RaymondBalise/butterfly documentation built on Dec. 27, 2019, 2:16 a.m.