D-vine quantile regression with discrete variables: analysis of bike rental

knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)

Required packages

library(vinereg)
require(ggplot2)
require(dplyr)
require(purrr)
require(scales)
require(quantreg)

Plot function for marginal effects

plot_marginal_effects <- function(covs, preds) {
    cbind(covs, preds) %>%
        tidyr::gather(alpha, prediction, -seq_len(NCOL(covs))) %>%
        dplyr::mutate(prediction = as.numeric(prediction)) %>%
        tidyr::gather(variable, value, -(alpha:prediction)) %>%
        dplyr::mutate(value = as.numeric(value)) %>%
        ggplot(aes(value, prediction, color = alpha)) +
        geom_point(alpha = 0.15) + 
        geom_smooth(span = 0.5, se = FALSE) + 
        facet_wrap(~ variable, scale = "free_x") +
        theme(legend.position = "none") +
        theme(plot.margin = unit(c(0, 0, 0, 0), "mm")) +
        xlab("")
}

Data preparation

Load data

bikedata <- read.csv("day.csv")
bikedata[, 2] <- as.Date(bikedata[, 2])
head(bikedata)

Rename variables

bikedata  <- bikedata %>%
    rename(
        temperature = atemp, 
        month = mnth,
        weathersituation = weathersit,
        humidity = hum,
        count = cnt
    )

Un-normalize variables

See variable description on UCI web page.

bikedata <- bikedata %>%
    mutate(
        temperature = 66 * temperature + 16,
        windspeed = 67 * windspeed,
        humidity = 100 * humidity
    )

Show trend

ggplot(bikedata, aes(dteday, count)) +
    geom_line() + 
    scale_x_date(labels = scales::date_format("%b %y")) + 
    xlab("date") + 
    ylab("rental count") + 
    stat_smooth(method = "lm", se = FALSE, linetype = "dashed") + 
    theme(plot.title = element_text(lineheight = 0.8, size = 20)) +
    theme(text = element_text(size = 18))

Remove trend

lm_trend <- lm(count ~ instant, data = bikedata)
trend <- predict(lm_trend)
bikedata <- mutate(bikedata, count = count / trend)
ggplot(bikedata, aes(dteday, count)) + 
    geom_line() + 
    scale_x_date(labels = scales::date_format("%b %y")) + 
    xlab("date") + 
    ylab("detrended rental count") + 
    theme(plot.title = element_text(lineheight = 0.8, size = 20)) + 
    theme(text = element_text(size = 18))

Drop useless variables

bikedata <- bikedata %>%
    select(-instant, -dteday, -yr) %>%  # time indices
    select(-casual, -registered) %>%    # casual + registered = count
    select(-holiday) %>%                # we use 'workingday' instead
    select(-temp)                       # we use 'temperature' (feeling temperature)

Declare discrete variables as ordered

disc_vars <- c("season", "month", "weekday", "workingday", "weathersituation")
bikedata <- bikedata %>%
    mutate(weekday = ifelse(weekday == 0, 7, weekday)) %>%  # sun at end of week
    purrr::modify_at(disc_vars, as.ordered)

D-vine regression model

Fit model

fit <- vinereg(
  count ~ ., 
  data = bikedata, 
  family = c("onepar", "tll"), 
  selcrit = "aic"
)
fit
summary(fit)

In-sample predictions

alpha_vec <- c(0.1, 0.5, 0.9)
pred <- fitted(fit, alpha_vec)

Marginal effects

plot_marginal_effects(
    covs = select(bikedata, temperature), 
    preds = pred
)
plot_marginal_effects(covs = select(bikedata, humidity), preds = pred) +
    xlim(c(25, 100))
plot_marginal_effects(covs = select(bikedata, windspeed), preds = pred) 
month_labs <- c("Jan","", "Mar", "", "May", "", "Jul", "", "Sep", "", "Nov", "")
plot_marginal_effects(covs = select(bikedata, month), preds = pred) +
        scale_x_discrete(limits = 1:12, labels = month_labs)
plot_marginal_effects(covs = select(bikedata, weathersituation), 
                      preds = pred) +
    scale_x_discrete(limits = 1:3,labels = c("good", "medium", "bad"))
weekday_labs <- c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")
plot_marginal_effects(covs = select(bikedata, weekday), preds = pred) +
    scale_x_discrete(limits = 1:7, labels = weekday_labs)
plot_marginal_effects(covs = select(bikedata, workingday), preds = pred) +
    scale_x_discrete(limits = 0:1, labels = c("no", "yes")) +
    geom_smooth(method = "lm", se = FALSE) +
    xlim(c(0, 1))
season_labs <- c("spring", "summer", "fall", "winter")
plot_marginal_effects(covs = select(bikedata, season), preds = pred) +
    scale_x_discrete(limits = 1:4, labels = season_labs)


Try the vinereg package in your browser

Any scripts or data that you put into this service are public.

vinereg documentation built on Nov. 2, 2023, 5:51 p.m.