R/plot.R

Defines functions plot_dummy get_data condistr_plot

library(tidyverse)
library(ggplot2)
library(QuantifQuantile)
library(ggrepel)


# ggplot(data = sample, aes(x=x,y=y)) + geom_point(aes(color = as.factor(by)))

condistr_plot <- function(data, x,y, by = NULL){
  if (is.null(by)){
    data <- data %>% add_column(by = 1)
    by <- "by"
  }
  qtiles <- (1:99/100)

  results <- as_tibble()
  groups <- unique(pull(data[,by]))
  for (i in groups){
    print(paste0("i = ", i))

    qreg.model <- QuantifQuantile(X = pull(data[data[,by] == i, x]),
                                  Y = pull(data[data[,by] == i,y]),
                                  alpha = qtiles)

    new_results <- fitted.values(qreg.model) %>%
      t() %>% as_tibble() %>% add_column(x = qreg.model$x) %>%
      pivot_longer(cols = starts_with("V"), names_to = "pctile", values_to = "y") %>%
      add_column(by = i) %>%
      mutate(ptile = as.numeric(sub("V","",pctile)),
             weights = ((50-abs(50-ptile))/50)^4,
             pairs = abs(50-ptile),
             label = sub("V","P",pctile),
             label = ifelse((x==max(x) | x ==min(x)),label, NA),
             label = ifelse(ptile == 50, "Median", label),
             label = ifelse(x==max(x) | pairs == 25, label, NA),
             label = ifelse(x==min(x) | ptile == 50, label, NA)
             )

    results <- results %>% bind_rows(new_results)

  }


  results
  split_two <- results %>% filter(ptile >= 50) %>% arrange(by,ptile,x)
  split_one <- results %>% filter(ptile < 50) %>% arrange(by,ptile,desc(x))
  sorted <- bind_rows(split_one, split_two)

  p <- ggplot(data = sorted,aes(x=x,y=y)) +
    geom_polygon(aes(fill=factor(by),group=100*by+pairs),alpha = .015) +
    geom_line(data = to_plot[to_plot$ptile==50,], aes(color=factor(by),group=by, linetype = "Median")) +
    geom_line(data = to_plot[to_plot$ptile==25 | to_plot$ptile==75 ,],
              aes(color=factor(by),
                  group=100*by+ptile,
                  linetype = "IQR"),alpha =.6)+
    guides(color = FALSE) +
    scale_linetype_manual("",values=c("IQR"=2,"Median"=1))
}

get_data <-function(){
  x <- runif(10000)
  by <- sample(c(0,1), 10000, replace= TRUE)
  y <- 2*x+2*by*x^.5 + rnorm(10000)

  data <- data.frame(x,y,by) %>% as_tibble()

  data
}

plot_dummy <- function(){
  data <- get_data()

  p <- condistr_plot(data, x= "x", y= "y", by = "by")


  p + theme_classic() +
    theme(panel.grid.major=element_blank(),
          panel.grid.minor=element_blank(),
          legend.position = c(.2,.9),
          legend.box = "horizontal")+
    scale_fill_discrete(labels = c("female",'male')) +
    guides(fill = guide_legend(title = "",order=1,override.aes = list(alpha=1)))

}
delliotthart/condistR documentation built on May 14, 2021, 6:01 p.m.