knitr::opts_chunk$set(echo = TRUE)

This Rmd reads in the daily average AQI and calculates the percentile for each daily value relative to that month. Writes percentile values to csv file = aqi_percentile_month.csv

library(tidyverse)
library(here)
library(janitor)
library(lubridate)

R Markdown

aqi <- read_csv(here::here("data", "nswgov_daily.csv")) %>%
  clean_names()
aqi$date <- dmy(aqi$date)

glimpse(aqi)

Make it long

aqi_long <- aqi %>%
  select(1:27) %>%
  pivot_longer(names_to = "site", values_to = "aqi", 2:27)

select just sydney

sydney <- aqi_long %>%
  filter(site == "sydney_central_east_raqi_24_hour_index") 

Just Sydney_2019

this is not what we want, percentiles over 2019, not that date across years

sydney2019 <- sydney %>%
  filter(date > "2018-12-31")

percentile <- sydney2019 %>%
  mutate(percentile = percent_rank(aqi)) %>%
  mutate(percent_100 = percentile*100) 

Percentiles

use yday function from lubriate to get a day number

sydney <- sydney %>%
  mutate(day = yday(date), month = month(date), year = year(date))  

sydney$month <- as.factor(sydney$month)

By day number

This added percentile by day, but there are only 5 values. So maybe better to look at today relative to a month.

day_percent  <- sydney %>%
  group_by(day) %>%
  mutate(percent = percent_rank(aqi)*100)

By month

month_percent <- sydney %>%
  group_by(month) %>%
  mutate(percent = percent_rank(aqi)*100)

#write to csv

month_percent %>%
  write_csv(here::here("data", "aqi_percentile_month.csv"))


ropenscilabs/smoky documentation built on May 17, 2022, 11:57 a.m.