knitr::opts_chunk$set(echo = TRUE)
library(tidyverse) library(sf) library(rnaturalearth) # own functions my_funs <- list.files(path = "./R/", pattern = "\\.R", full.names = TRUE) purrr::walk(my_funs, .f = source) # local data storage pth <- "../__DATA/OSN-COVID-ECTRL/"
read in data - should have processed before!
# icao country mapping icao <- readr::read_csv("./data/ICAO-ISO-RQ.csv") # osn data with soil/TW/EEZ #ds <- fst::read_fst(path = paste0(pth, "OSN_2021_02_clean")) %>% tibble() #ds <- fst::read_fst(path = paste0(pth, "OSN_2019_clean")) %>% tibble() #ds <- fst::read_fst(path = paste0(pth, "OSN_2020_clean")) %>% tibble() fn <- list.files(path = pth, pattern = "OSN_2021_.*_clean", full.names = TRUE) if(length(fn) > 1) warning("!!!! more than 1 target file") else{ ds <- fst::read_fst(fn) %>% tibble() print(meta_dataset_description(ds, "now")) }
colSums(is.na(ds)) we have more destinations associated than origins
rewritten with function
# Utility functions # icao_singles <- c("C","K","Y","Z") # icao_non_russia <- c("UA","UB","UC","UD","UG","UK","UM","UT") icao_to_ctry_reg <- . %>% mutate( ADEP_CTRY = stringr::str_sub(origin, 1,2) ,ADEP_REG = stringr::str_sub(origin, 1,1) ,ADES_CTRY = stringr::str_sub(destination, 1, 2) ,ADES_REG = stringr::str_sub(destination, 1, 1) ) clean_icao_ctry <- function( .ds ,.icao_singles = c("C","K","Y","Z") # single letter countries ,.icao_non_russia = c("UA","UB","UC","UD","UG","UK","UM","UT") # non Russia ){ df <- .ds %>% mutate( ADEP_CTRY = case_when( #------------ ADEP correction --------------- ADEP_REG %in% .icao_singles ~ str_sub(origin, 1,1) ,ADEP_REG == "U" & !(ADEP_CTRY %in% .icao_non_russia) ~ "U" , TRUE ~ ADEP_CTRY ) ,ADES_CTRY = case_when( #------------ ADES correction --------------- ADES_REG %in% .icao_singles ~ str_sub(origin, 1,1) ,ADES_REG == "U" & !(ADES_CTRY %in% .icao_non_russia) ~ "U" , TRUE ~ ADES_CTRY ) ) # end mutate ------------- return(df) }
ADEP and ADES given, i.e. origin and destination To-do: what to do with the wild ones 00NY, etc
crunch_adep_ades_known <- function(.ds) { tmp1 <- .ds %>% filter(!is.na(origin) & !is.na(destination)) tmp1 <- tmp1 %>% icao_to_ctry_reg() %>% clean_icao_ctry() %>% rename(ADEP = origin, ADES = destination) tmp1 <- tmp1 %>% group_by(day, ADEP, ADES) %>% summarise(N_DEP = n(), .groups = "drop") } tmp1 <- ds %>% crunch_adep_ades_known()
2019: 7448256 2020: 7224151
ADEP known hist(adep_xxxx$altitude_2, breaks = 200)
todo - china midland not seen by 12TW; id 84 NOS946
# helper function establishing icao iso3 mapping iso3_icao_map <- function(.icao) { # combine with icao countries ## trim icao to a single hit (first occurrence) iso3_icao <- .icao %>% filter(!is.na(ISO_3)) %>% group_by(ISO_3) %>% slice(1) %>% select(ICAO_CTRY, ISO_3) } crunch_adep_known <- function(.ds, .icao = icao) { df <- .ds %>% filter( !is.na(origin) & is.na(destination) ) iso3_icao <- iso3_icao_map(.icao = .icao) df <- df %>% left_join(iso3_icao, by = c("POS2_TW" = "ISO_3") ) %>% rename(POS2_ICAO2 = ICAO_CTRY ) %>% left_join(iso3_icao, by = c("POS2_EEZ" = "ISO_3") ) %>% rename(POS2a_ICAO2 = ICAO_CTRY ) %>% mutate(ADES = case_when( !is.na(POS2_ICAO2) & altitude_2 <= 8000 ~ paste0(POS2_ICAO2,"xx") , is.na(POS2_ICAO2) & !is.na(POS2a_ICAO2) & altitude_2 <= 8000 ~ paste0(POS2a_ICAO2,"xx") ,!is.na(POS2_ICAO2) & altitude_2 > 8000 ~ paste0(POS2_ICAO2,"hi") , is.na(POS2_ICAO2) & !is.na(POS2a_ICAO2) & altitude_2 > 8000 ~ paste0(POS2a_ICAO2,"hi") ,TRUE ~ "idnk" ) ) }
2021-02: 467319 2020: 622755 2019: 1000868
tmp2 <- ds %>% crunch_adep_known() %>% rename(ADEP = origin) %>% group_by(day, ADEP, ADES) %>% summarise(N_DEP = n(), .groups = "drop")
departure unkown
crunch_ades_known <- function(.ds, .icao = icao) { df <- .ds %>% filter( is.na(origin) & !is.na(destination) ) iso3_icao <- iso3_icao_map(.icao = .icao) df <- df %>% left_join(iso3_icao, by = c("POS1_TW" = "ISO_3") ) %>% rename(POS1_ICAO2 = ICAO_CTRY ) %>% left_join(iso3_icao, by = c("POS1_EEZ" = "ISO_3") ) %>% rename(POS1a_ICAO2 = ICAO_CTRY ) %>% mutate(ADEP = case_when( !is.na(POS1_ICAO2) & altitude_1 <= 8000 ~ paste0(POS1_ICAO2,"xx") , is.na(POS1_ICAO2) & !is.na(POS1a_ICAO2) & altitude_1 <= 8000 ~ paste0(POS1a_ICAO2,"xx") ,!is.na(POS1_ICAO2) & altitude_1 > 8000 ~ paste0(POS1_ICAO2,"hi") , is.na(POS1_ICAO2) & !is.na(POS1a_ICAO2) & altitude_1 > 8000 ~ paste0(POS1a_ICAO2,"hi") ,TRUE ~ "idnk" ) ) }
tmp3 <- ds %>% crunch_ades_known() %>% rename(ADES = destination) %>% group_by(day, ADEP, ADES) %>% summarise(N_DEP = n(), .groups = "drop")
2019: 1516180 2020: 1076855
nothing known
crunch_neither_known <- function(.ds, .icao = icao) { df <- .ds %>% filter( is.na(origin) & is.na(destination) ) iso3_icao <- iso3_icao_map(.icao = .icao) # --------- ADEP -------------------------------------------------------- rq <- df %>% select(id, POS1_S, POS1_TW, POS1_EEZ) %>% mutate(POS1_MAJ = purrr::pmap_chr(.[-1], .f = ~calculate_mode(c(...)))) df <- df %>% left_join(rq %>% select(id, POS1_MAJ), by = "id") %>% left_join(iso3_icao, by = c("POS1_MAJ" = "ISO_3") ) %>% mutate(ADEP = case_when( !is.na(POS1_MAJ) & altitude_1 <= 8000 ~ paste0(ICAO_CTRY,"xx") ,!is.na(POS1_MAJ) & altitude_1 > 8000 ~ paste0(ICAO_CTRY,"hi") ,TRUE ~ "idnk" )) # --------------- ADES ------------------------------------------------- rq <- df %>% select(id, POS2_S, POS2_TW, POS2_EEZ) %>% mutate(POS2_MAJ = purrr::pmap_chr(.[-1], .f = ~calculate_mode(c(...)))) df <- df %>% left_join(rq %>% select(id, POS2_MAJ), by = "id") %>% left_join(iso3_icao, by = c("POS2_MAJ" = "ISO_3") ) %>% mutate(ADES = case_when( !is.na(POS2_MAJ) & altitude_2 <= 8000 ~ paste0(ICAO_CTRY.y,"xx") ,!is.na(POS2_MAJ) & altitude_1 > 8000 ~ paste0(ICAO_CTRY.y,"hi") ,TRUE ~ "idnk" )) }
outsourced to cleaning step
world <- ne_countries(scale = "medium", returnclass = "sf") world <- world %>% select(iso_a3)
# --------- ADEP -------------------------------------------------------- rq <- df %>% pts_latlon_to_sf(latitude_1, longitude_1, FALSE) %>% sf::st_join(world) %>% sf::st_drop_geometry() %>% rename(POS1_S = iso_a3) %>% select(id, POS1_S)
df <- df %>% left_join(rq, by = "id")
# --------------- ADES ------------------------------------------------- rq <- df %>% filter(!is.na(latitude_2) | !is.na(longitude_2)) %>% pts_latlon_to_sf(latitude_2, longitude_2, FALSE) %>% sf::st_join(world) %>% sf::st_drop_geometry() %>% rename(POS2_S = iso_a3) %>% select(id, POS2_S)
df <- df %>% left_join(rq, by = "id")
tmp4 <- ds %>% crunch_neither_known() %>% group_by(day, ADEP, ADES) %>% summarise(N_DEP = n(), .groups = "drop")
2019: 206,792 2020: 100,952
Saving results for further processing
# CHECK THAT FILE NAME IS CORRECT!!! bind_rows(tmp1, tmp2, tmp3, tmp4) %>% readr::write_csv(paste0(pth, "OSN_2020_ADEP_ADES_count.csv"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.