Nothing
## ----knitr-setup, include = FALSE---------------------------------------------
knitr::opts_chunk$set(fig.align = "center",
fig.width = 5,
fig.height = 4,
dpi = 100)
## ----vis-dat, echo = TRUE-----------------------------------------------------
library(visdat)
vis_dat(airquality)
## ----vis-miss-cluster-vanilla-------------------------------------------------
vis_miss(airquality)
## ----example-geom-point, echo = TRUE, fig.width = 5, fig.height=3-------------
library(ggplot2)
ggplot(airquality,
aes(x = Solar.R,
y = Ozone)) +
geom_point()
## ----geom-miss-point, echo = TRUE, out.width = "50%", fig.show='hold'---------
ggplot(airquality,
aes(x = Solar.R,
y = Ozone)) +
geom_point()
library(naniar)
ggplot(airquality,
aes(x = Solar.R,
y = Ozone)) +
geom_miss_point()
## ----ggmissing-facet, echo = TRUE, out.width = "70%"--------------------------
ggplot(airquality,
aes(x = Solar.R,
y = Ozone)) +
geom_miss_point() +
facet_wrap(~Month)
## ----ggmissing-theme, echo = TRUE, out.width = "70%"--------------------------
ggplot(airquality,
aes(x = Solar.R,
y = Ozone)) +
geom_miss_point() +
facet_wrap(~Month) +
theme_dark()
## ----gg-miss-var--------------------------------------------------------------
gg_miss_var(airquality)
## ----gg-miss-var-theme--------------------------------------------------------
gg_miss_var(airquality) + theme_bw()
gg_miss_var(airquality) + labs(y = "Look at all the missing ones")
## ----gg-miss-var-facet--------------------------------------------------------
gg_miss_var(airquality, facet = Month)
## ----mds-fig, out.width = "800px", echo = F-----------------------------------
knitr::include_graphics("missingness-data-structures.png")
## ----as-shadow----------------------------------------------------------------
as_shadow(airquality)
## ----bind-shadow, echo = TRUE-------------------------------------------------
aq_shadow <- bind_shadow(airquality)
aq_nab <- nabular(airquality)
library(dplyr)
glimpse(aq_shadow)
glimpse(aq_nab)
all.equal(aq_shadow, aq_nab)
## ----bind-shadow-pattern------------------------------------------------------
airquality %>%
bind_shadow() %>%
group_by(Ozone_NA) %>%
summarise_at(.vars = "Solar.R",
.funs = c("mean", "sd", "var", "min", "max"),
na.rm = TRUE)
## ----shadow-plot, echo = TRUE, out.width = "60%"------------------------------
ggplot(aq_shadow,
aes(x = Temp,
colour = Ozone_NA)) +
geom_density()
## ----temp-humidity-explore----------------------------------------------------
# what if we explore the value of air temperature and humidity based on
# the missingness of each
oceanbuoys %>%
bind_shadow() %>%
ggplot(aes(x = air_temp_c,
fill = humidity_NA)) +
geom_histogram()
oceanbuoys %>%
bind_shadow() %>%
ggplot(aes(x = humidity,
fill = air_temp_c_NA)) +
geom_histogram()
## ----simpute-invisible, echo = TRUE, fig.height = 3---------------------------
library(simputation)
library(dplyr)
airquality %>%
impute_lm(Ozone ~ Temp + Wind) %>%
ggplot(aes(x = Temp,
y = Ozone)) +
geom_point()
## ----simpute-visible, echo = TRUE, fig.height = 3-----------------------------
aq_shadow %>%
as.data.frame() %>%
impute_lm(Ozone ~ Temp + Wind) %>%
ggplot(aes(x = Temp,
y = Ozone,
colour = Ozone_NA)) +
geom_point()
## ----helpers-n-miss, echo = TRUE----------------------------------------------
dplyr::n_distinct(airquality)
dplyr::n_distinct(airquality$Ozone)
n_miss(airquality)
n_miss(airquality$Ozone)
n_complete(airquality)
n_complete(airquality$Ozone)
## ----prop-miss-case-----------------------------------------------------------
prop_miss_case(airquality)
pct_miss_case(airquality)
## ----miss-case-summary--------------------------------------------------------
miss_case_summary(airquality)
## ----miss-case-table----------------------------------------------------------
miss_case_table(airquality)
## ----prop-miss-var------------------------------------------------------------
prop_miss_var(airquality)
pct_miss_var(airquality)
## ----miss-var-summary---------------------------------------------------------
miss_var_summary(airquality)
## ----miss-var-table-----------------------------------------------------------
miss_var_table(airquality)
## ----miss-var-run-------------------------------------------------------------
miss_var_run(pedestrian,
hourly_counts)
## ----ped-miss-var-span--------------------------------------------------------
miss_var_span(pedestrian,
hourly_counts,
span_every = 100)
## ----ped-miss-var-sum---------------------------------------------------------
pedestrian %>% miss_var_summary()
## ----ped-group-by-miss-var-sum------------------------------------------------
pedestrian %>%
group_by(month) %>%
miss_var_summary() %>%
filter(variable == "hourly_counts")
## ----add-prop-miss, echo = TRUE-----------------------------------------------
airquality %>%
add_prop_miss() %>%
head()
## ----rpart-miss, echo = TRUE--------------------------------------------------
library(rpart)
library(rpart.plot)
airquality %>%
add_prop_miss() %>%
rpart(prop_miss_all ~ ., data = .) %>%
prp(type = 4, extra = 101, prefix = "Prop. Miss = ")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.