inst/doc/Airports.R

## ---- include = FALSE----------------------------------------------------
knitr::opts_chunk$set(
  echo = TRUE, 
  message = FALSE, 
  warning = FALSE, 
  fig.align = 'center',
  fig.width = 7,
  fig.height = 5
)

## ----setup---------------------------------------------------------------
library(biclustermd)

## ------------------------------------------------------------------------
# install.packages("nycflights13")
library(nycflights13)
data("flights")

## ------------------------------------------------------------------------
library(dplyr)
flights <- flights %>%
  select(month, dest, arr_delay)

## ------------------------------------------------------------------------
library(tidyr)
flights <- flights %>%
  group_by(month, dest) %>%
  summarise(mean_arr_delay = mean(arr_delay, na.rm = TRUE)) %>%
  spread(dest, mean_arr_delay) %>% 
  as.data.frame()

## ------------------------------------------------------------------------
rownames(flights) <- month.name[flights$month]
flights <- as.matrix(flights[, -1])
flights[1:5, 1:5]

## ------------------------------------------------------------------------
library(biclustermd)
bc <- biclustermd(data = flights, col_clusters = 6, row_clusters = 4)
bc

## ------------------------------------------------------------------------
library(ggplot2)
?autoplot.biclustermd_sse
autoplot(bc$SSE)

## ------------------------------------------------------------------------
?autoplot.biclustermd_sim
autoplot(bc$Similarities)

## ------------------------------------------------------------------------
?autoplot.biclustermd
autoplot(bc) +
  scale_fill_viridis_c(na.value = "white") +
  labs(x = "Destination Airport", y = "Month", fill = "Average Delay")

## ------------------------------------------------------------------------
autoplot(bc, transform_colors = TRUE, c = 1/10) +
  scale_fill_viridis_c(na.value = "white", limits = c(0, 1)) +
  labs(x = "Destination Airport", y = "Month", fill = "Average Delay")

## ------------------------------------------------------------------------
autoplot(bc, transform_colors = TRUE, c = 1/10, reorder = TRUE) +
  scale_fill_viridis_c(na.value = "white", limits = c(0, 1)) +
  labs(x = "Destination Airport", y = "Month", fill = "Average Delay")

## ------------------------------------------------------------------------
repeated_bc <- rep_biclustermd(flights, nrep = 100, col_clusters = 6, row_clusters = 4)
repeated_bc

## ------------------------------------------------------------------------
repeated_bc$runtime

## ------------------------------------------------------------------------
plot(cummin(repeated_bc$rep_sse), type = 'o', ylab = 'Cumulative Minimum', xlab = 'Repeat Number')

## ------------------------------------------------------------------------
training <- gather(repeated_bc$best_bc)
training %>% head()

## ------------------------------------------------------------------------
autoplot(repeated_bc$best_bc, row_clusts = 1, col_clusts = 4) +
  scale_fill_viridis_c(na.value = "white") +
  labs(x = "Destination Airport", y = "Month", fill = "Average Delay")

model <- training %>% 
  filter(row_cluster == 1, col_cluster == 4) %>% 
  lm(value ~ row_name + col_name, data = .)
summary(model)
sqrt(mean(resid(model) ^ 2))

Try the biclustermd package in your browser

Any scripts or data that you put into this service are public.

biclustermd documentation built on June 17, 2021, 5:11 p.m.