R/lorenz.R

Defines functions lorenz

Documented in lorenz

#' @import DescTools
#' @import ggplot2
NULL

#' Plots the (generalized) Lorenz Curve.
#'
#' This function plots a Lorenz curve. Following 'Bernasco, W. and W.
#' Steenbeek (2017). More places than crimes: Implications for evaluating the
#' law of crime concentration at place. Journal of Quantitative Criminology.
#' https://doi.org/10.1007/s10940-016-9324-7', if # events < # units of analysis,
#' vertical lines are added to the plot to indicate the percentage of units of
#' analysis where events could occur under maximum equality.
#'
#' @param mydata     a vector of N units, with counts of events per unit. The
#' vector should contain at least non-negative elements. The result will be NA,
#' if the vector contains negative elements.
#' @param prop         logical. Should the axes show proportions from 0 - 1
#' (default = TRUE), or percentages 0 - 100?
#' @param rescale      logical. Should the plot be rescaled so that 'c/n' is
#' moved to the end of the x-axis and the plot is stretched out accordingly?
#' @return             returns a Lorenz curve plot (generated by ggplot2)
#' @examples
#' # Plot a Lorenz curve of a non-sparse data situation:
#' # 10 units of analysis and 100 events
#' crimes <- c(22,18,17,12,11,8,6,3,2,1)
#' lorenz(crimes)
#' # Because sum(crimes) > length(crimes), it is the normal Lorenz curve.
#'
#' # Plot a Lorenz curve of a sparse data situation:
#' # 10 units of analysis and 5 events
#' crimes <- c(3,1,1,rep(0,7))
#' lorenz(crimes)
#'
#' # You can rescale the plot so that 'c/n' is moved to the end of the x-axis
#' # (and the graph is stretched to the right accordingly)
#' lorenz(crimes, rescale = TRUE)
#'
#' # Rescaling the graph is easier with very sparse data situation, as the
#' # adjusted line of maximal equality can become almost vertical from origin:
#' crimes <- c(3,1,1,rep(0,150))
#' lorenz(crimes)
#'
#' @export
lorenz <- function(mydata, prop = TRUE, rescale = FALSE){

  # from DescTools::Gini
  mydata <- as.numeric(mydata)
  if (any(is.na(mydata)) || any(mydata < 0)) return(NA_real_)

  mydata <- sort(mydata, decreasing = TRUE)

  if(rescale) {
    if(sum(mydata) < length(mydata)) mydata <- mydata[1:sum(mydata)]
  }

  mydata <- data.frame(events = mydata, idnr = seq_along(mydata))

  denom <- ifelse(prop, 100, 1)

  # fill data.frame
  mydata$ones <- 1
  mydata$percEvents <- mydata$events / sum(mydata$events) * 100
  mydata$cumpercEvents <- cumsum(mydata$percEvents)
  mydata$cumpercUnit <- cumsum(mydata$ones) / nrow(mydata) * 100

  Nunits <- nrow(mydata)
  Nevents <- sum(mydata$events)
  NunitsWithEvents <- nrow(mydata[mydata$events>0,])

  # This block is to create alternative variables with which to create adjusted plot
  mydata$cumpercUnit.new <- cumsum(mydata$ones) / Nevents * 100
  mydata$cumpercUnit.new[mydata$cumpercUnit.new>100] <- 100

  # take subset of data, to easily draw lines for generalized Gini later
  mydata_gen <- mydata[1:min(Nevents, nrow(mydata)),]
  # add first line with zero's, to make origin of curves start at 0,0
  mydata_gen <- rbind(c(rep(NA,4),0,0,0,0,0), mydata_gen)
  # where to draw the 'c/n' line?
  xaxis.point <- max(mydata_gen$cumpercUnit)

  # add first line with zero's, to make origin of curves start at 0,0
  mydata <- rbind(c(rep(NA,4),0,0,0), mydata)

  # prepare plot
  myplot <- ggplot2::ggplot(data=mydata, aes(x=cumpercUnit/denom, y=cumpercEvents/denom)) +
    ylab(paste("Cum.", ifelse(prop, "Prop.", "Percent.") , "Events")) + xlab(paste("Cum.", ifelse(prop, "Prop.", "Percent.") , "Units of Analysis")) +
    theme_light() +
    theme(legend.position="none") +
    scale_y_continuous(breaks=seq(0,100,20)/denom) +
    scale_x_continuous(breaks=seq(0,100,20)/denom) +
    geom_segment(aes(x=0, xend=100/denom, y=0, yend=100/denom), linetype="longdash") +
    geom_line()

  # add new line of maximal equality if Nunits > Nevents
  if (Nunits > Nevents){
    myplot <- myplot +
      geom_segment(data = mydata_gen, aes(x=0, xend=max(cumpercUnit/denom), y=0, yend=100/denom), linetype=3) +
      geom_segment(data = mydata_gen, aes(x=max(cumpercUnit/denom), xend=max(cumpercUnit/denom), y=2/denom, yend=100/denom), linetype=3) +
      annotate("text", x = xaxis.point/denom, y = 0, label = "c/n", size=4)
  }

  return(myplot)
}
wsteenbeek/lorenzgini documentation built on Oct. 11, 2020, 6:56 p.m.