R/Plot_Labels.R

Defines functions factor_wrap get_caption get_legend rename_value get_label add_preassigned_scales date_x_year_breaks prepare_labels_and_colors get_column_key

Documented in add_preassigned_scales date_x_year_breaks factor_wrap get_caption get_column_key get_label get_legend prepare_labels_and_colors rename_value

#' Get Column Key based on the names in a data frame
#'
#' @param data A data frame
#' @param path The path or url for the column key.  By default, checks
#' the CSISdefense Github lookups repository at CSISdefense/Lookup-Tables/master/style/
#'
#' @return A data frame of the column names from data joined up to the column key
#'
#' @details Warning: This function should be used in data processing only,
#' not in a live app.  It reads an external file from GitHub,
#' which will slow down an app substantially if done repeatedly. Works best
#' when standardize_names has already been run on the data frame in question.
#'
#' @examples
#'
#' FullData <- read_csv("2017_SP_CompetitionVendorSizeHistoryBucketPlatformSubCustomer.csv",
#'   col_names = TRUE, col_types = "cccccccccc",na=c("NA","NULL"))
#' PrepareLabelsAndColors(Coloration,FullData,"Customer")
#'
#' @export
get_column_key <- function(
    data,
    path="https://raw.githubusercontent.com/CSISdefense/Lookup-Tables/master/style/"
){
  column_key<-colnames(data)
  column_key<-as.data.frame(column_key)
  colnames(column_key)[1]<-"column"

  if(!file.exists(file.path(path,"Lookup_Coloration.csv")) || path=="offline")
    path<-file.path(get_local_lookup_path(),"style//")

  #Join up the files
  column_key<-read_and_join_experiment(column_key,
                                       "Lookup_Column_Key.csv",
                                       path=path,
                                       directory="",
                                       by="column",
                                       new_var_checked=FALSE,
                                       case_sensitive = FALSE
  )

  #Set empty string coloration.keys equal to na
  column_key$coloration.key[column_key$coloration.key==""]<-NA
  return(column_key)
}


#' Prepare Labels And Colors
#'
#' @param data the data frame to be joined
#' @param var the variables (column names) for which to prepare colors; by default all will be done
#' @param na_replaced if TRUE, replace NA values with var before adding colors
#' @param path The location of the lookup file
#'
#' @return A new data frame built on the variable var. It will
#' include colors, and order, and proper name labels.
#'
#' @details This function applies standard colors and orders to a
#' column var in a data frame data. Colors and order are drawn from pre-existing lookup tables.
#' When values are missing or wrong, these tables must be manually updated.
#' This function is badly optimized, reading in multiple csvs every time.
#' It is intend for use in data preparation source code and not to be used in a
#' real time web environment.
#'
#' @examples FullData<-standardize_variable_names(Path,
#'   FullData)
#'
#' @export
prepare_labels_and_colors<-function(data
                                    ,var=NULL
                                    ,na_replaced=TRUE
                                    ,path="https://raw.githubusercontent.com/CSISdefense/Lookup-Tables/master/style/"
                                    #                                  ,VAR.override.coloration=NA
                                    ,missing_allowed=FALSE
)
{


  data<-as.data.frame(data)


  #Confirm that the category is even available in the data set.
  if(!is.null(var)){
    if(!var %in% names(data)){
      stop(paste(var,"column is not found in data."))
    }
  }

  if(!file.exists(file.path(path,"Lookup_Coloration.csv")) || path=="offline")
    path<-file.path(get_local_lookup_path(),"style//")
  #Read in coloration
  coloration<-read.csv(

    paste(path,"Lookup_Coloration.csv",sep=""),
    header=TRUE, sep=",", na.strings="", dec=".", strip.white=TRUE,
    stringsAsFactors=FALSE,encoding="UTF-8"
  )
  if(min(nchar(coloration$RGB[!is.na(coloration$RGB)]))!=7) stop("Malformed hex code in RGB")

  if(ncol(coloration)!=12) stop("Mismatched number of columns in coloration.txt.")

  #Fix oddities involving coloration text, and handle accented characters.

  coloration$variable <- gsub("\\\\n","\n",coloration$variable,useBytes = TRUE)#iconv, from="UTF-8", to="LATIN1")
  coloration$Label <- gsub("\\\\n","\n",coloration$Label,useBytes = TRUE)#incov, from="UTF-8", to="LATIN1")

  #Translate the category name into the appropriate coloration.key
  #This is used because we have more category names than coloration.key
  column_key<-get_column_key(data,path=path)

  #If a column has been passed
  if(!is.null(var)){
    column_key<-subset(column_key, column==var)

    #Should adjust this to give proper errors for multiple vars
    #when only one is missing
    if(any(is.na(column_key$coloration.key))){
      if(missing_allowed)
        return(NA)
      else
        stop(paste(var,"is missing from Lookup_column_key.csv"))
    }

  }
  else {
    column_key<-subset(column_key, !is.na(coloration.key) & coloration.key!="Exclude")
  }

  if(nrow(column_key)==0) stop("No matching columns")

  names.data<-NULL
  for(v in (1:nrow(column_key))){
    if(na_replaced==TRUE){
      data<-replace_nas_with_unlabeled(data,column_key$column[v])
    }

    #Limit the lookup table to those series that match the variable
    labels_category_data<-subset(coloration, coloration.key==
                                   column_key$coloration.key[v] )

    if(nrow(labels_category_data)==0) stop(paste("No matching levels for:",column_key$column[v]))

    #Error checking for duplicates in lookup_coloration.csv
    if(anyDuplicated(labels_category_data$variable)>0){
      print(labels_category_data$variable[
        duplicated(labels_category_data$variable)])
      stop(paste("Lookup_Coloration.csv has"
                 ,sum(duplicated(labels_category_data$variable))
                 ,"duplicate value(s) for category="
                 ,column_key$coloration.key[v], ". See above for a list of missing labels")
      )
    }
    c<-as.character(column_key$column[v])
    k<-as.character(column_key$coloration.key[v])
    #Check for any values in the current field that are not assigned a color.
    values<-unique(data.frame(data)[,c])


    #Handle any case discrepancies.
    #Certain type of quote.
    # labels_category_data$variable<-gsub("\u0092","’",labels_category_data$variable)

    if(k=="Country"){
      #Manual fix in case excel is breaking this.
      labels_category_data$variable[labels_category_data$variable=="ÌÉland Islands"]<-"Åland Islands"
    }
    case_mismatch<-    toupper(labels_category_data$variable) %in% toupper(values)&
      !labels_category_data$variable %in% values
    labels_category_data$variable[case_mismatch]
    for(i in labels_category_data$variable[case_mismatch]){
      labels_category_data$variable[labels_category_data$variable==i]<-
        as.character(values[toupper(values)==toupper(labels_category_data$variable[labels_category_data$variable==i])])
    }





    NA.labels<-values[!values %in% labels_category_data$variable]



    if (length(NA.labels)>0){
      #Unlabeled is highly standardized, adding automatically
      if(length(NA.labels)==1 &
         !is.na(NA.labels[1])&
         NA.labels[1]=="Unlabeled")
      {

        labels_category_data<-rbind(labels_category_data,
                                    data.frame(coloration.key=column_key$coloration.key[v],
                                               variable="Unlabeled",
                                               Label="Unlabeled",
                                               Display.Order= 999,
                                               Color="reddish gray",
                                               RGB= "#967878",
                                               shape=NA,
                                               size=NA,
                                               alpha=NA,
                                               text.color="default grey",
                                               text.RGB="#554449",
                                               abbreviation="Unlabeled"
                                    ))
      }
      else{
        if(missing_allowed)
          return(NA)
        else{
          #Otherwise create an error.
          print(as.character(NA.labels))
          stop(paste("Lookup_Coloration.csv is missing "
                     ,length(NA.labels)
                     ," label(s) for column="
                     ,c," & key=",k, ". See above for a list of missing labels.", sep="")

          )
        }
      }
    }

    labels_category_data<-subset(labels_category_data
                                 , variable %in% unique(data[,c]))


    #Order the names.data and then pass on the same order to the actual data in data
    labels_category_data$Display.Order<-as.numeric(as.character(labels_category_data$Display.Order))
    labels_category_data<-labels_category_data[order(labels_category_data$Display.Order),]
    labels_category_data$column<-c
    warning(c)
    names.data<-rbind(names.data,labels_category_data)
  }
  names.data
}

#' Quickly assign yearly breaks to a chart
#'
#' @param start First year of break sequence
#' @param stop Last year of break sequence
#' @by Frequency of data breaks, e.g. 1 for every year, 5 for every 5 years
#' @fiscal_year A placeholder for future tuning by fiscal vs. calendar year
#' @partial_year If one year of incomplete data is included, specify it with this variable
#' @partial_label If one year of incomplete data is included, specify it with this variable
#'
#' @return A plot with added color and fill scales for the column passed
#'
#' @details Add year breaks at specified intervals for date data
#'
#' @examples date_x_year_breaks(2000,2023,2)
#'
#' @export
date_x_year_breaks<-function(start,stop,by,fiscal_year=TRUE,partial_year=NULL,partial_label="\nYTD"){
  if(is.null(partial_year))
    return(scale_x_date(breaks = as.Date(paste(seq(start,stop, by=by),"01","01",sep="-")),
                        date_labels = "'%y"))
  else {
    #List dates in sequence and the partial_year
    b<-as.Date(paste(c(seq(start,stop, by=by),partial_year),"01","01",sep="-"))
    #If the partial year shows up twice, remove it the duplicate
    b<-b[!duplicated(b)]
    l<-as.character(b)
    py<-l==paste(partial_year,"01","01",sep="-")
    l[!py]<-format(as.Date(l[!py]),"'%y")
    l[py]<-paste(format(as.Date(l[py]),"'%y"),partial_label,sep="")
    return(scale_x_date(breaks = b,labels=l)
    )

  }

  ToplinePricing+scale_x_continuous(breaks=c(seq(2000,2020, by=6),2023),
                                    labels=c(paste("'",substr(seq(2000,2020, by=6),3,4)),"'23\n(Q1-Q2)"))
}

#' Take existing data frame and associate colors with values
#'
#' @param plot The existing ggplot, needed to add more than one scale
#' @param labels_and_colors A csis360 lookup data.frame with factor information
#' @param var The name of the column, default to none for generic colors
#'
#' @return A plot with added color and fill scales for the column passed
#'
#' @details labels_and_colors is a data.frame produced by
#' the csis360 package, see prepare_labels_and_colors drawing
#' from lookup table that preassign labels, order, and color
#' to the expected values of a column.
#'
#' @examples plot<-add_preassigned_scales(plot,labels_and_colors,"pricing.mechanism.sum")
#'
#' @import ggplot2
#' @export
add_preassigned_scales<-function(
  plot,
  labels_and_colors,
  var="None"
  # reverse_color=FALSE #' @param reverse_color If True reverse the order of the factor
){

  #If a column name is passed and it is in labels_and_colors
  if(var!="None" & any(labels_and_colors$column==var)){
    if(is.character(plot$data[,var])) plot$data[,var]<-factor(plot$data[,var])

    #Rename factor to use label values
    oldname<-subset(labels_and_colors,column==var & (!Label %in% levels(plot$data[,var])&variable %in% levels(plot$data[,var])))$variable
    newname<-subset(labels_and_colors,column==var & (!Label %in% levels(plot$data[,var])&variable %in% levels(plot$data[,var])))$Label
    plot$data[,var]<-plyr::mapvalues(plot$data[,var],oldname,newname)
    var_label<-subset(labels_and_colors,column==var & (Label %in% levels(plot$data[,var])|variable %in% levels(plot$data[,var])))
    var_label$Display.Order<-as.numeric(var_label$Display.Order)
    var_label<-var_label[order(var_label$Display.Order),]
    if (any(duplicated(var_label$Label))) stop("Duplicated label")
    # if(reverse_color) var_label<-var_label[order(-var_label$Display.Order),]
    plot$data[,var]<-factor(plot$data[,var],levels=var_label$Label)
    plot<-plot+scale_color_manual(
      values = subset(labels_and_colors,column==var & (Label %in% levels(plot$data[,var])|variable %in% levels(plot$data[,var])))$RGB,
      limits=c(subset(labels_and_colors,column==var & (Label %in% levels(plot$data[,var])|variable %in% levels(plot$data[,var])))$Label)
    )+scale_fill_manual(
      values = subset(labels_and_colors,column==var & (Label %in% levels(plot$data[,var])|variable %in% levels(plot$data[,var])))$RGB,
      # limits=c(subset(labels_and_colors,column==var)$variable),
      limits=c(subset(labels_and_colors,column==var & (Label %in% levels(plot$data[,var])|variable %in% levels(plot$data[,var])))$Label)
    )
  }
  else{
    #Drawing from the color blind palette from
    #http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/
    cbPalette <- c( "#e58846", "#66c6cb", "#4c9361",
                    "#eec260", "#0054A4", "#BB4243", "#9eadd8","#999999")

    # BarPalette <- scale_fill_manual(
    #   values =  c( "#e58846", "#66c6cb", "#4c9361",
    #                "#eec260", "#0054A4", "#BB4243",
    #                "#9eadd8","#999999"))
    # c(
    # "#004165",
    # "#0065a4",
    # "#0095AB",
    # "#66c6cb",
    # "#75c596",
    # "#0faa91",
    # "#51746d",
    # "#607a81",
    # "#252d3a",
    # "#353535",
    # "#797979"))


    # LinePalette <- scale_color_manual(
    #   values =  c( "#e58846", "#66c6cb", "#4c9361",
    #                "#eec260", "#0054A4", "#BB4243",
    #                "#9eadd8","#999999"))

    #Palettte for 12 colors http://mkweb.bcgsc.ca/colorblind/palettes/12.color.blindness.palette.txt
    cbPalette12 <- scale_color_manual(
      values =  c( "#9F0162", "#009F81", "#FF5AAF ",
                   "#00FCCF", "#8400CD","#008DF9   ",
                   "#00C2F9","#FFB2FD ","#A40122 ",
                   "#E20134 ","#FF6E3A","#FFC33B"))

    # values = c(
    #   "#004165",
    #   "#75c596",
    #   "#b24f94",
    #   "#0095ab",
    #   "#0a8672",
    #   "#e22129",
    #   "#66c6cb",
    #   "#51746d",
    #   "#797979",
    #   "#788ca8",
    #   "#353535"))

    if(var!="None"){
      warning(paste(var,"not found in labels_and_colors"))
      if(length(levels(factor(plot$data[,var])))<=8)
        plot<-plot+scale_color_manual(
          values = cbPalette
        )+scale_fill_manual(
          values = cbPalette
        )
    }
    # else if(length(levels(factor(plot$data[,var])))<=12){
    #   plot<-plot+scale_color_manual(
    #     values = cbPalette12
    #   )+scale_fill_manual(
    #     values = cbPalette12
    #   )
    #   warning("Too many levels to apply DIIG color blind palette")
    # }
    else(warning("Too many levels to apply color blind palette"))
  }
  return(plot)
}



#' Get Label
#'
#' @return character string with the label corresponding to a column
#'
#' @param var The names of the column, default of none for generic colors
#' @param column_key A csis360 lookup data.frame with column information
#' @param share If TRUE, calculates the share
#'
#'
#'
#'
#'
#' @export
get_label <- function(
  var,
  column_key,
  share = FALSE
){
  if(is.null(var)) stop("Null var passed to get_label.")
  if(var=="None") label<-""
  else{
    if(share==TRUE){
      title<-subset(column_key,column==var)$share.title
    }
    else
      title<-subset(column_key,column==var)$title
    label<-if_else(is.na(title),var,title)
  }
  return(label)

}




#' Renames a factor level to user-specified name, in the passed data frame
#'
#' @param data The data frame in which to rename the value
#' @param input Shiny input object
#'
#' @return A data frame with the factor level renamed
#'
#'
#'
#'
#' @export
rename_value <- function(
  data,    # the data frame in which to rename the value
  input    # shiny input object
){
  levels(data[[input$edit_var]])[levels(data[[
    input$edit_var]]) == input$edit_value] <- input$rename_value_txt

  return(data)
}


#' Extract a legend https://stackoverflow.com/questions/43366616/ggplot2-legend-only-in-a-plot
#' Alternate unused approach From https://github.com/tidyverse/ggplot2/wiki/Share-a-legend-between-two-ggplot2-graphs
#'
#' @param a.gplot a ggplot
#'
#' @return Returns a legend
#'
#'
#'
#'
#'
#' @import ggplot2
#' @export
get_legend<-function(a.gplot){
  tmp <- ggplot_gtable(ggplot_build(a.gplot))
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
  legend <- tmp$grobs[[leg]]
  return(legend)}



#' Return default caption text.
#'
#' @return A label that can be added to a graph.
#'
#' @export
get_caption<-function(
){
  c<-labs(caption="Source: FPDS; CSIS analysis.")
  return(c)
}

#' Add line breaks to a string in a factor
#'
#' @param string A string
#' @param nwrap The number of characters between returns
#'
#' @return String with carriage breaks
#'
#' @export
# Helper function for string wrapping.
# Default 20 character target width.
factor_wrap <- function(f, nwrap=20) {
  # https://stackoverflow.com/questions/37174316/how-to-fit-long-text-into-ggplot2-facet-titles
  string_wrap<- function(string, nwrap=20) {
    paste(strwrap(string, width=nwrap), collapse="\n")
  }
  string_wrap <- Vectorize(string_wrap)
  if(!is.factor(f)){
    warning("Converting f to a factor")
    f<-factor(f)
  }
  levels(f)<-string_wrap(levels(f))
  f
}
CSISdefense/csis360 documentation built on April 25, 2024, 12:01 a.m.