R/plot_rawadjusted.R

Defines functions plot_rawadjusted

Documented in plot_rawadjusted

#' Plot of HIV prevalence over time
#'
#' Plots the raw and/or fully adjusted HIV prevalence over time
#'
#' This function has been developed to plot HIV prevalence over time.  Both the raw and fully adjusted HIV prevalence (obtained using the \link[ANCRTAdjust]{data_clean}, \link[ANCRTAdjust]{mt_adjust}
#' and \link[ANCRTAdjust]{impcov_adjust} functions) can be plotted and compared or just the fully adjusted prevalence can be plotted.  The time unit can be set to 
#' either yearly or by reporting period.
#' 
#' @param data The dataframe output by \link[ANCRTAdjust]{impcov_adjust}. The following variables must be present:
#'  \itemize{
#'   \item \code{snu1}: The subnational unit 1.  If plots are to be generated for the different snu1s, the parameter by_snu1 = "TRUE" must 
#'   have been specified when running \link[ANCRTAdjust]{impcov_adjust}.
#'   \item \code{time}: The year and/or period (i.e., quarters) that the data was collected.If plots are to be generated by reporting period, the parameter 
#'   by_period = TRUE must have been specified when running \link[ANCRTAdjust]{impcov_adjust}. If plots are to be generated by year, the parameter
#'   by_year = TRUE must have been specified when running \link[ANCRTAdjust]{impcov_adjust}.
#'   \item \code{adjusted_prv}: The HIV prevalence adjusted for imperfect testing coverage and all previous adjustments (i.e. data cleaning, adjustment 
#'   for multiple testing and/or adjustment for missing reporting periods, if performed) (generated using the \link[ANCRTAdjust]{impcov_adjust} function).
#'   \item \code{hiv_raw}: The HIV prevalence using the raw data (generated using the \link[ANCRTAdjust]{impcov_adjust} function).
#'  }
#' @param snu1 The snu1 for which the plot is generated. "all" indicates that the plot include the entire country data (i.e. not snu1-specific). Default = "all".
#' @param time_unit "period" indicates that the results be plotted by reporting period. "year" indicates that the results be plotted by year. Default = "period".
#' @param hiv_raw "TRUE" indicates that both the fully adjusted and raw HIV prevalences be plotted.  "FALSE" indicates that only the fully
#' adjusted HIV prevalences be plotted. Default = "TRUE".
#' @param y_lim The y-axis upper limit. Default = 40.
#'
#' @import ggplot2
#'
#' @author Mathieu Maheu-Giroux
#' @author Brittany Blouin
#'
#' @return A plot of HIV prevalence over time.
#'
#' @export

plot_rawadjusted <- function(data, snu1 = "all", time_unit = "period", hiv_raw = TRUE, y_lim = 40) {
  if (!(time_unit %in% c('period', 'year'))) {
    stop("time_unit must be eithe 'period' or 'year'")
  }
  
  adjusted_prv <- NULL
  
  data$time <- as.numeric(data$time)
  
  if(time_unit == "period" & hiv_raw == TRUE) {
    Plot <-
    ggplot(data[data$time < 9000 & data$snu1 == snu1,], aes(time)) +
    geom_line(aes(y = hiv_raw, color = "Raw HIV prevalence"), size=1) + geom_point(aes(y = hiv_raw, color = "Raw HIV prevalence")) +
    geom_line(aes(y = adjusted_prv, color = "Adjusted HIV prevalence"), size = 1) + geom_point(aes(y = adjusted_prv, color = "Adjusted HIV prevalence")) +
    xlab("Time") +
    ylab("HIV Prevalence (%)") +
    ylim(c(0, y_lim)) +
    theme(axis.text = element_text(size = 7), axis.title = element_text(size = 7), legend.position = "right")+
    scale_colour_manual(name = "",
                      values = c("Adjusted HIV prevalence"="blue", "Raw HIV prevalence"="red"))
    return(Plot)
  }
  
  if (time_unit == "year" & hiv_raw == TRUE) {
    if(max(data$time) > 9999){
      data <- data[data$time > 9999,]
      data$time <- substr(data$time, 1, 4)
      data$time <- as.numeric(data$time)
    }
    if (max(data$time) <= 9999) {
      data <- data[data$time < 9999,]
      data$time <- as.numeric(data$time)
    }
    Plot2 <-
      ggplot(data[data$snu1 == snu1,], aes(time)) +
      geom_line(aes(y = hiv_raw, color = "Raw HIV prevalence"), size=1) + geom_point(aes(y = hiv_raw, color = "Raw HIV prevalence")) +
      geom_line(aes(y = adjusted_prv, color = "Adjusted HIV prevalence"), size = 1) + geom_point(aes(y = adjusted_prv, color = "Adjusted HIV prevalence")) +
      xlab("Time") +
      ylab("HIV Prevalence (%)") +
      ylim(c(0, y_lim)) +
      theme(axis.text = element_text(size = 7), axis.title = element_text(size = 7), legend.position = "right")+
      scale_colour_manual(name="",
                          values=c("Adjusted HIV prevalence"="blue", "Raw HIV prevalence"="red"))
    return(Plot2)
  }
  
  if(time_unit == "period" & hiv_raw == FALSE){
    Plot3 <-
      ggplot(data[data$time < 9000 & data$snu1 == snu1,], aes(time)) +
      geom_line(aes(y = adjusted_prv, color = "Adjusted HIV prevalence"), size = 1) + geom_point(aes(y = adjusted_prv, color = "Adjusted HIV prevalence")) +
      xlab("Time") +
      ylab("HIV Prevalence (%)") +
      ylim(c(0, y_lim)) +
      theme(axis.text = element_text(size = 7), axis.title = element_text(size = 7), legend.position = "none") +
      scale_colour_manual(name = "", values = c("Adjusted HIV prevalence"="blue"))
    return(Plot3)
  }
  
  if (time_unit == "year" & hiv_raw == FALSE) {
    if (max(data$time) > 9999) {
      data <- data[data$time > 9999,]
      data$time <- substr(data$time, 1, 4)
      data$time <- as.numeric(data$time)
    }
    if (max(data$time) <= 9999) {
      data <- data[data$time < 9999,]
      data$time <- as.numeric(data$time)
    }
    Plot4 <-
      ggplot(data[data$snu1 == snu1,], aes(time)) +
      geom_line(aes(y = adjusted_prv, color = "Adjusted HIV prevalence"), size = 1) + geom_point(aes(y = adjusted_prv, color = "Adjusted HIV prevalence")) +
      xlab("Time") +
      ylab("HIV Prevalence (%)") +
      ylim(c(0, y_lim)) +
      theme(axis.text = element_text(size = 7), axis.title = element_text(size = 7), legend.position = "none") +
      scale_colour_manual(name = "", values = c("Adjusted HIV prevalence"="blue"))
    return(Plot4)
  }

}
brittanyblouin/ANCRTAdjust documentation built on Oct. 28, 2019, 4:53 a.m.