knitr::opts_chunk$set(
  collapse = TRUE,
  eval = F,
  comment = "#>"
)

GNAF is a set of relational tables designed for a database. A simple flat table can be made.

Data obtained from https://data.gov.au/dataset/ds-dga-19432f89-dc3a-4ef3-b943-5326ef1dbecc/details

g-naf_feb22_allstates_gda2020_psv_105.zip

1.5 Gb zip, ~3 Gb unzipped, states are in separate files

library(dplyr)

# NSW files moved to
files <- list.files('...', full.names = T)
names(files) <- basename(files)
files <- as.list(files)

# Join
street_number_postcode <- readr::read_delim(files$NSW_ADDRESS_DETAIL_psv.psv, delim = '|')
street_name <- readr::read_delim(files$NSW_STREET_LOCALITY_psv, delim = '|')
locality_name <- readr::read_delim(files$NSW_LOCALITY_psv.psv, delim = '|')
geocode <- readr::read_delim(files$NSW_ADDRESS_DEFAULT_GEOCODE_psv.psv, delim = '|')
meshblock_2016 <- readr::read_delim(files$NSW_ADDRESS_MESH_BLOCK_2016_psv.psv, delim = '|')
meshblock_2016_code <- readr::read_delim(files$NSW_MB_2016_psv.psv, delim = '|')

# Clean + free memory
# 4,921,334 to 2,801,406 ... keep the first ID of a street number
street_number_postcode <-
  street_number_postcode %>%
  select(ADDRESS_DETAIL_PID, NUMBER_FIRST, STREET_LOCALITY_PID, LOCALITY_PID, POSTCODE) %>%
  distinct(NUMBER_FIRST, STREET_LOCALITY_PID, LOCALITY_PID, POSTCODE, .keep_all = T)

gnaf_flat <-
  street_number_postcode %>%
  left_join(
    street_name %>%
      select(STREET_LOCALITY_PID, STREET_NAME, STREET_TYPE_CODE),
    by = 'STREET_LOCALITY_PID') %>%
  left_join(
    locality_name %>%
      select(LOCALITY_PID, LOCALITY_NAME),
    by = 'LOCALITY_PID'
  ) %>%
  left_join(
    geocode %>%
      select(ADDRESS_DETAIL_PID, LONGITUDE, LATITUDE),
    by = 'ADDRESS_DETAIL_PID'
  ) %>%
  left_join(
    meshblock_2016 %>%
      select(ADDRESS_DETAIL_PID, MB_2016_PID),
    by = 'ADDRESS_DETAIL_PID'
  ) %>%
  left_join(
    meshblock_2016_code %>%
      select(MB_2016_PID, MB_2016_CODE),
    by = 'MB_2016_PID'
  )

gnaf_flat <-
  gnaf_flat %>%
  mutate(STREET_NAME = paste(STREET_NAME, STREET_TYPE_CODE),
         STREET_NAME = sub(' NA$', '', STREET_NAME)) %>%
  select(NUMBER_FIRST, STREET_NAME, LOCALITY_NAME, POSTCODE,  LONGITUDE, LATITUDE, MB_2016_CODE)

gnaf_flat <-
  gnaf_flat %>%
  arrange(POSTCODE,STREET_NAME, LOCALITY_NAME, NUMBER_FIRST) %>%
  mutate(across(c(NUMBER_FIRST, POSTCODE), .fns = as.integer),
         MB_2016_CODE = as.character(MB_2016_CODE))

gnaf <- gnaf_flat

use_data(gnaf, overwrite = TRUE)


Shaunson26/futureScenario documentation built on June 15, 2022, 2:12 p.m.