# Generating the various charts used for documentaion (e.g. for NCAP page)
document_anomaly <- function(){
# Download meas
library(tidyverse)
library(ggrepel)
library(glue)
location_id <- rcrea::cities(name=c("Delhi", "Mumbai", "Pune", "Kolkata"))$id
month_yoy <- "2024-07-01"
weather_file <- "tmp/weather_documentation.RDS"
weather_file_fire <- "tmp/weather_documentation_fire.RDS"
# file.remove(weather_file)
# file.remove(weather_file_fire)
deweathered_trend <- creadeweather::deweather(
location_id = location_id,
deweather_process_id = "default_trend",
poll="pm25",
source="cpcb",
upload_results = F,
save_weather_filename = weather_file,
read_weather_filename = weather_file,
weather_update_era5 = F,
ntrainings = 5
)
deweathered_trend %>%
select(location_id, result) %>%
tidyr::unnest(result) %>%
filter(grepl("trend", variable)) %>%
ggplot(aes(date, value)) +
geom_line(data=function(x)filter(x, variable=="trend")) +
geom_ribbon(inherit.aes = F,
data=function(x) filter(x, variable %in% c('trend_p975','trend_p025')) %>%
tidyr::spread(variable, value), aes(x=date, ymin=trend_p025, ymax=trend_p975)) +
facet_wrap(~location_id)
# Check yoy
deweathered_trend %>%
select(location_id, result) %>%
tidyr::unnest(result) %>%
filter(variable=="trend") %>%
group_by(location_id, year=year(date), month=month(date)) %>%
summarise(value=mean(value, na.rm=T)) %>%
group_by(location_id, month) %>%
arrange(year) %>%
mutate(yoy = (value - lag(value)) / lag(value)) %>%
ungroup() %>%
mutate(date = as.Date(glue("{year}-{month}-01"))) %>%
ggplot(aes(date, yoy)) +
geom_line() +
facet_wrap(~location_id) +
geom_hline(yintercept=0)
# deweathered_trend_fire <- creadeweather::deweather(
# location_id = "delhi_ind.25_1_in",
# deweather_process_id = "default_trend_fire_96h",
# poll="pm25",
# source="cpcb",
# upload_results = F,
# save_weather_filename = weather_file_fire,
# read_weather_filename = weather_file_fire,
# weather_update_era5 = F,
# use_weather_cache = T
# )
# deweathered <- creadeweather::deweather(
# location_id = location_id,
# deweather_process_id = "default_anomaly_2018_2099",
# poll="pm25",
# source="cpcb",
# upload_results = F,
# save_weather_filename = weather_file,
# read_weather_filename = weather_file,
# weather_update_era5 = F
# )
#
#
# deweathered_yoy <- creadeweather::deweather_yoy(
# location_id = location_id,
# months = month_yoy,
# deweather_process_id = "default_anomaly_2018_2099",
# poll="pm25",
# source="cpcb",
# upload_results = F,
# keep_nonyoy_results = T,
# save_weather_filename = weather_file,
# read_weather_filename = weather_file,
# weather_update_era5 = F
# )
# deweathered_yoy_fire <- creadeweather::deweather_yoy(
# location_id = location_id,
# months = month_yoy,
# deweather_process_id = "default_anomaly_2018_2099_fire",
# poll="pm25",
# source="cpcb",
# upload_results = F,
# keep_nonyoy_results = T,
# save_weather_filename = weather_file_fire,
# read_weather_filename = weather_file_fire,
# weather_update_era5 = F
# )
# Plot trend
deweathered_trend %>%
filter(source=="cpcb") %>%
tidyr::unnest(result) %>%
filter(date >= "2018-01-01") %>%
filter(variable %in% c("trend", "observed")) %>%
select(location_id, date, variable, value) %>%
rcrea::utils.running_average(30, min_values = 20) %>%
ggplot(aes(x=date, y=value)) +
geom_hline(yintercept=0, color="grey80") +
geom_line(aes(color=variable)) +
# geom_text_repel(
# data = deweathered$result[[1]] %>% filter(date==max(date)),
# aes(label=variable, color=variable),
# nudge_x = 1,
# nudge_y = 1,
# size=3
# ) +
facet_wrap(~variable, ncol=1) +
rcrea::theme_crea() +
theme(strip.placement = "top") +
labs(
title="PM2.5 deweathered trend in Delhi and Mumbai",
subtitle="30-day running average",
y="PM2.5 (µg/m³)",
x=NULL
) +
scale_color_manual(values=c("trend"=rcrea::pal_crea[["Dark.red"]], "observed"=rcrea::pal_crea[["Blue"]])) +
facet_wrap(~location_id) +
rcrea::scale_y_crea_zero()
ggsave("doc/figures/deweathering_ts_trend.png", width=10, height=6, scale=1.5)
# Plot with cut dates
# deweathered_yoy %>%
# filter(source=="cpcb") %>%
# select(location_id, result) %>%
# tidyr::unnest(result) %>%
# rcrea::utils.running_average(30, min_values=20) %>%
# filter(variable %in% c("anomaly", "observed", "predicted")) -> plt_data
#
# ggplot(plt_data) +
# geom_hline(yintercept=0, color="grey80") +
# geom_line(data=plt_data, aes(x=date, y=value, color=variable)) +
# geom_rect(
# data=function(x) filter(x, date==max(date), variable %in% c("anomaly", "observed")),
# aes(xmin=as.Date(month_yoy), xmax=as.Date(month_yoy) + lubridate::days_in_month(month_yoy),
# ymin=-Inf, ymax=Inf, fill="Excluded from training"),
# alpha=0.2) +
# # Add same one a year before
# geom_rect(
# data=function(x) filter(x, date==max(date), variable %in% c("anomaly", "observed")),
# aes(xmin=as.Date(month_yoy) - lubridate::years(1), xmax=as.Date(month_yoy) + lubridate::days_in_month(month_yoy) - lubridate::years(1),
# ymin=-Inf, ymax=Inf, fill="Excluded from training"),
# alpha=0.2) +
# # scale_x_date(date_breaks = "1 month", date_labels = "%b %Y")
# # geom_text_repel(
# # data = deweathered_yoy$result[[1]] %>% filter(date==max(date)),
# # aes(x=date, y=value, label=variable, color=variable),
# # nudge_x = 1,
# # nudge_y = 1,
# # size=3
# # ) +
# facet_wrap(case_when(variable=="anomaly"~ "Anomaly", T ~ "Observed and Predicted") ~ location_id, ncol=2) +
# rcrea::theme_crea() +
# theme(strip.placement = "top") +
# labs(
# title="Weather-based prediction of PM2.5 in Delhi",
# subtitle="Anomaly = Observed - Predicted | 30-day running average",
# y="PM2.5 (µg/m³)",
# x=NULL,
# fill=NULL,
# color=NULL
# ) +
# rcrea::scale_color_crea_d()
#
#
# ggsave("doc/figures/deweathering_ts_yoy.png", width=10, height=6, scale=1.5)
#
# Plot yoy change
# deweathered_yoy %>%
# filter(source=="cpcb") %>%
# select(location_id, result) %>%
# tidyr::unnest(result) %>%
# filter(grepl("yoy.*_rel", variable)) %>%
# mutate(variable_str=case_when(
# grepl("total", variable) ~ "Observed change yoy",
# grepl("weather", variable) ~ "Influence of weather",
# grepl("emission", variable) ~ "Influence of emissions"
# )) %>%
# ggplot(aes(y=value, x=1)) +
# # limit width of column
# geom_col(data=function(x) filter(x, variable != "yoy_total_rel"),
# aes(fill=variable_str),
# width=0.5) +
# geom_point(data=function(x) filter(x, variable=="yoy_total_rel"), size=4) +
# geom_text(
# # data=function(x) filter(x, variable=="yoy_total_rel"),
# aes(label=glue("{variable_str}: {paste0(ifelse(value>0,'+',''), round(value, 2)*100,'%')}"),
# y=value,
# vjust=ifelse(value>0,-1,1)*1.5),
# # nudge_x = 0.0,
# # nudge_y = 0.05,
# hjust=0.5,
# size=4
# ) +
# geom_hline(yintercept=0, color="grey80") +
# rcrea::theme_crea() +
# rcrea::scale_fill_crea_d() +
# labs(
# title="July 2024 year-over-year change in PM2.5 in Delhi, India",
# subtitle="Influence of weather and emissions",
# x=NULL, y="µg/m3",
# fill=NULL
# ) +
# scale_x_continuous(limits=c(-0,2)) +
# # hide x axis
# theme(axis.text.x = element_blank(),
# axis.ticks.x = element_blank()) +
# facet_wrap(~location_id)
#
#
# ggsave("doc/figures/deweathering_bar_yoy.png", width=10, height=6, scale=1.5)
# Compare the two appraoches: trend and anomaly
yoy_based_on_trend <- deweathered_trend %>%
filter(source=="cpcb") %>%
select(location_id, result) %>%
tidyr::unnest(result) %>%
filter(floor_date(date, "month") %in% c(
month_yoy %>% as.Date(),
month_yoy %>% as.Date() - lubridate::years(1)
)) %>%
group_by(location_id, year=year(date), variable) %>%
summarise(value=mean(value, na.rm=T)) %>%
arrange(year) %>%
group_by(across(-c(year, value))) %>%
mutate(delta = value - lag(value)) %>%
# Add observed value of first year
ungroup() %>%
group_by(location_id) %>%
mutate(observed_prev = value[year == min(year) & variable == "observed"]) %>%
ungroup() %>%
filter(!is.na(delta)) %>%
select(-c(value)) %>%
tidyr::pivot_wider(
names_from = "variable",
values_from = "delta",
names_prefix = "yoy_"
) %>%
rename(
yoy_total = yoy_observed,
yoy_emission = yoy_trend,
) %>%
mutate(
yoy_weather = yoy_total - yoy_emission,
yoy_total_rel = yoy_total / observed_prev,
yoy_residual = yoy_anomaly / observed_prev,
yoy_weather_rel = yoy_weather / observed_prev,
yoy_emission_rel = yoy_emission / observed_prev
) %>%
pivot_longer(
cols = -c(year, observed_prev, location_id),
names_to = "variable",
values_to = "value",
) %>%
filter(grepl("yoy.*_rel", variable)) %>%
select(location_id, variable, value) %>%
mutate(variable_str=case_when(
grepl("total", variable) ~ "Observed change yoy",
grepl("weather", variable) ~ "Influence of weather",
grepl("emission", variable) ~ "Influence of emissions"
))
yoy_based_on_trend %>%
filter(grepl("delhi|mumbai", location_id)) %>%
ggplot(aes(y=value, x=1)) +
# limit width of column
geom_col(data=function(x) filter(x, variable != "yoy_total_rel"),
aes(fill=variable_str),
width=0.5) +
geom_point(data=function(x) filter(x, variable=="yoy_total_rel"), size=4) +
geom_text(
# data=function(x) filter(x, variable=="yoy_total_rel"),
aes(label=glue("{variable_str}: {paste0(ifelse(value>0,'+',''), round(value, 2)*100,'%')}"),
y=value,
vjust=ifelse(value>0,-1,1)*1.5),
# nudge_x = 0.0,
# nudge_y = 0.05,
hjust=0.5,
size=4
) +
geom_hline(yintercept=0, color="grey80") +
rcrea::theme_crea() +
rcrea::scale_fill_crea_d() +
labs(
title="July 2024 year-over-year change in PM2.5 in Delhi and Mumbai, India",
subtitle="Influence of weather and emissions",
x=NULL, y="µg/m3",
fill=NULL
) +
scale_x_continuous(limits=c(-0,2)) +
# hide x axis
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank()) +
facet_wrap(~location_id)
ggsave("doc/figures/deweathering_bar_yoy.png", width=10, height=6, scale=1.5)
# yoy_based_on_anomaly <- deweathered_yoy %>%
# filter(source=="cpcb") %>%
# select(location_id, result) %>%
# tidyr::unnest(result) %>%
# filter(grepl("yoy.*_rel", variable))
#
#
# bind_rows(
# yoy_based_on_trend %>% mutate(approach="Trend"),
# yoy_based_on_anomaly %>% mutate(approach="Anomaly")
# ) %>%
# mutate(variable_str=case_when(
# grepl("total", variable) ~ "Observed change yoy",
# grepl("weather", variable) ~ "Influence of weather",
# grepl("emission", variable) ~ "Influence of emissions"
# )) %>%
# ggplot(aes(y=value, x=approach)) +
# geom_col(data=function(x) filter(x, variable != "yoy_total_rel"),
# aes(fill=variable_str),
# width=0.5) +
# geom_point(data=function(x) filter(x, variable=="yoy_total_rel"), size=4) +
# geom_text(
# # data=function(x) filter(x, variable=="yoy_total_rel"),
# aes(label=glue("{variable_str}: {paste0(ifelse(value>0,'+',''), round(value, 2)*100,'%')}"),
# y=value,
# vjust=ifelse(value>0,-1,1)*1.5),
# # nudge_x = 0.0,
# # nudge_y = 0.05,
# hjust=0.5,
# size=4
# ) +
# geom_hline(yintercept=0, color="grey80") +
# rcrea::theme_crea() +
# rcrea::scale_fill_crea_d() +
# labs(
# title="July 2024 year-over-year change in PM2.5 in Delhi, India",
# subtitle="Influence of weather and emissions",
# x=NULL, y="µg/m3",
# fill=NULL
# ) +
# # scale_x_continuous(limits=c(-0,2)) +
# # hide x axis
# # theme(axis.text.x = element_blank(),
# # axis.ticks.x = element_blank()) +
# facet_wrap(~location_id)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.