data-raw/census/src/abs2021-CED.R

## This script creates the abs2021 dataframe from the ABS Census 2021 Datapacks
## Metrics are consistent with Census from 2001 and 2006.
## The individual csv files must be held locally. They come from a zip file and can be downloaded from:
## https://www.abs.gov.au/census/find-census-data/datapacks
## Select: 2021 Census Datapacks, General Community Profile, Commonwealth Electoral Divisons
## Download for all of Australia
## Ensure tidy_CED21.csv is in the working directory
## Set your own working directory (at start) and export directory for write_rds (at end)

library(tidyverse)
library(readxl)

G1_Main <- read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G01_AUST_CED.csv")
G02_Medians <- read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G02_AUST_CED.csv")
G06_Marriage <- read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G06_AUST_CED.csv")
G09_BornOverseas <- 
  cbind(read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G09A_AUST_CED.csv"), 
        read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G09B_AUST_CED.csv"), 
        read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G09C_AUST_CED.csv"), 
        read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G09D_AUST_CED.csv"), 
        read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G09E_AUST_CED.csv"), 
        read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G09F_AUST_CED.csv"), 
        read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G09G_AUST_CED.csv"), 
        read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G09H_AUST_CED.csv"))


G13E_LanguageHome <- read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G13E_AUST_CED.csv")
G14_Religion <- read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G14_AUST_CED.csv")
G15_Study <- read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G15_AUST_CED.csv")
G16A_HighSchool <- read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G16A_AUST_CED.csv")
G16B_HighSchool <- read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G16B_AUST_CED.csv")
G17A_Income <- read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G17A_AUST_CED.csv")
G17B_Income <- read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G17B_AUST_CED.csv")
G17C_Income <- read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G17C_AUST_CED.csv")

G23_Volunteer <- read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G23_AUST_CED.csv")

G29_Family <- read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G29_AUST_CED.csv")
G32_FamilyIncome <- read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G32_AUST_CED.csv")
G33_HouseholdIncome <- read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G33_AUST_CED.csv")
G35_NumberInHouse <- read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G35_AUST_CED.csv")
G37_Tenure <- read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G37_AUST_CED.csv")
G40_Rent <- read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G40_AUST_CED.csv")
# G37_Internet <- read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G37_AUST_CED.csv")
G43_Employ <- read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G43_AUST_CED.csv")
G45_Address <- read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G45_AUST_CED.csv")
G49B_Uni <- read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G49B_AUST_CED.csv")
G54_Industry <- 
  cbind(read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G54A_AUST_CED.csv"),
        read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G54B_AUST_CED.csv"),
        read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G54C_AUST_CED.csv"),
        read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G54D_AUST_CED.csv"))
G60_Occupation <- 
  cbind(read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G60A_AUST_CED.csv"),
        read_csv("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_G60B_AUST_CED.csv"))

Area_sqkm <- read_excel("data-raw/census/data/2021 Census GCP Commonwealth Electoral Division for AUS/2021Census_geog_desc_1st_and_2nd_release.xlsx", 
                        sheet='2021_ASGS_Non_ABS_Structures') %>% 
  filter(ASGS_Structure == 'CED') %>% 
  rename(Area_sqkm = `Area sqkm`)


new <- G1_Main %>% mutate(
  ID = substr(CED_CODE_2021, 4, 6),
  
  #G01
  Population = Tot_P_P,
  Age00_04 = (Age_0_4_yr_P/Tot_P_P)*100, 
  Age05_14 = (Age_5_14_yr_P/Tot_P_P)*100,
  Age15_19 = (Age_15_19_yr_P/Tot_P_P)*100,
  Age20_24 = (Age_20_24_yr_P/Tot_P_P)*100,
  Age25_34 = (Age_25_34_yr_P/Tot_P_P)*100,
  Age35_44 = (Age_35_44_yr_P/Tot_P_P)*100,
  Age45_54 = (Age_45_54_yr_P/Tot_P_P)*100,
  Age55_64 = (Age_55_64_yr_P/Tot_P_P)*100,
  Age65_74 = (Age_65_74_yr_P/Tot_P_P)*100,
  Age75_84 = (Age_75_84_yr_P/Tot_P_P)*100,
  Age85plus = (Age_85ov_P/Tot_P_P)*100,
  Age15plus = Tot_P_P - Age_0_4_yr_P - Age_5_14_yr_P,
  AusCitizen = (Australian_citizen_P / Tot_P_P)*100,
  BornOverseas = (Birthplace_Elsewhere_P/ (Birthplace_Elsewhere_P + Birthplace_Australia_P))*100,
  Indigenous = (Indigenous_P_Tot_P/Tot_P_P)*100,
  OtherLanguageHome = (Lang_used_home_Oth_Lang_P/ (Lang_used_home_Oth_Lang_P + Lang_used_home_Eng_only_P))*100,
  EnglishOnly = 100 - OtherLanguageHome,
  
  #G02
  AverageHouseholdSize = G02_Medians$Average_household_size,
  MedianHouseholdIncome = G02_Medians$Median_tot_hhd_inc_weekly,
  MedianFamilyIncome = G02_Medians$Median_tot_fam_inc_weekly,
  MedianRent = G02_Medians$Median_rent_weekly,
  MedianLoanPay = G02_Medians$Median_mortgage_repay_monthly,
  MedianAge = G02_Medians$Median_age_persons,
  MedianPersonalIncome = G02_Medians$Median_tot_prsnl_inc_weekly,
  
  #G06
  Married = (G06_Marriage$P_Tot_Marrd_reg_marrge/G06_Marriage$P_Tot_Total)*100,
  DeFacto = (G06_Marriage$P_Tot_Married_de_facto/G06_Marriage$P_Tot_Total)*100,
  
  #G09
  BornOverseas_NS = (G09_BornOverseas$P_COB_NS_Tot / G09_BornOverseas$P_Tot_Tot)*100,

  Born_UK = ((G09_BornOverseas$P_England_Tot + G09_BornOverseas$P_Wales_Tot + G09_BornOverseas$P_Scotland_Tot) / (G09_BornOverseas$P_Tot_Tot)) * 100,

  Born_MidEast = ((G09_BornOverseas$P_Afghanistan_Tot + G09_BornOverseas$P_Egypt_Tot + G09_BornOverseas$P_Iran_Tot + G09_BornOverseas$P_Lebanon_Tot + G09_BornOverseas$P_Turkey_Tot) / (G09_BornOverseas$P_Tot_Tot)) * 100,
                            
  Born_SE_Europe = ((G09_BornOverseas$P_Bosnia_Herzegov_Tot + G09_BornOverseas$P_Croatia_Tot + G09_BornOverseas$P_Greece_Tot) / (G09_BornOverseas$P_Tot_Tot)) * 100,
  
  Born_Asia = ((G09_BornOverseas$P_Bangladesh_Tot + G09_BornOverseas$P_Cambodia_Tot + G09_BornOverseas$P_China_Tot + G09_BornOverseas$P_Hong_Kong_SAR_Ch_Tot + G09_BornOverseas$P_Indonesia_Tot + G09_BornOverseas$P_Indonesia_Tot + G09_BornOverseas$P_Japan_Tot + G09_BornOverseas$P_Korea_South_Tot + G09_BornOverseas$P_Malaysia_Tot + G09_BornOverseas$P_Nepal_Tot + G09_BornOverseas$P_Pakistan_Tot + G09_BornOverseas$P_Philippines_Tot + G09_BornOverseas$P_Singapore_Tot + G09_BornOverseas$P_Sri_Lanka_Tot + G09_BornOverseas$P_Thailand_Tot + G09_BornOverseas$P_Taiwan_Tot + G09_BornOverseas$P_Vietnam_Tot) / (G09_BornOverseas$P_Tot_Tot)) * 100,
  
  #G13
  Language_NS = (G13E_LanguageHome$P_Tot_NS / G13E_LanguageHome$P_Tot_Tot)*100,
  
  #G14
  Christianity = (G14_Religion$Christianity_Tot_P/(G14_Religion$Tot_P))*100,
  Anglican = (G14_Religion$Christianity_Anglican_P/(G14_Religion$Tot_P))*100,
  Catholic = (G14_Religion$Christianity_Catholic_P/(G14_Religion$Tot_P))*100,
  Buddhism = (G14_Religion$Buddhism_P/(G14_Religion$Tot_P))*100,
  Islam = (G14_Religion$Islam_P/(G14_Religion$Tot_P))*100,
  Judaism = (G14_Religion$Judaism_P/(G14_Religion$Tot_P))*100,
  OtherChrist = ((G14_Religion$Christianity_Tot_P - G14_Religion$Christianity_Anglican_P - G14_Religion$Christianity_Catholic_P)/(G14_Religion$Tot_P))*100,
  NoReligion = (G14_Religion$SB_OSB_NRA_NR_P/(G14_Religion$Tot_P))*100,
  Religion_NS = (G14_Religion$Religious_affiliation_ns_P/G14_Religion$Tot_P)*100,
  Other_NonChrist = 100 - NoReligion - Christianity,
  Other_NonChrist = (1- ((G14_Religion$Christianity_Tot_P - G14_Religion$SB_OSB_NRA_NR_P)/(G14_Religion$Tot_P)))*100,
  
  #G15
  CurrentlyStudying = (G15_Study$Tot_P / Tot_P_P)*100, #of total population
  
  #G16
  HighSchool = (G16A_HighSchool$P_Y12e_Tot/(G16B_HighSchool$P_Tot_Tot))*100,
  HighSchool_NS = (G16B_HighSchool$P_Hghst_yr_schl_ns_Tot/G16B_HighSchool$P_Tot_Tot)*100, #of age appropriate population
  
  #G17C
  PersonalIncome_NS = (G17C_Income$P_PI_NS_ns_Tot / G17C_Income$P_Tot_Tot)*100,
  
  #G23
  Volunteer = (G23_Volunteer$P_Tot_Volunteer / (G23_Volunteer$P_Tot_Tot))*100,
  Volunteer_NS = (G23_Volunteer$P_Tot_Voluntary_work_ns / G23_Volunteer$P_Tot_Tot)*100,

  #G29
  FamilyRatio = G29_Family$Total_P/G29_Family$Total_F, #average number of people per family
  Couple_NoChild_House = (G29_Family$CF_no_children_F/G29_Family$Total_F)*100,
  Couple_WChild_House = (G29_Family$CF_Total_F/G29_Family$Total_F)*100,
  OneParent_House = (G29_Family$OPF_Total_F/G29_Family$Total_F)*100,
  
  #G32
  FamilyIncome_NS = ((G32_FamilyIncome$Partial_income_stated_Tot + G32_FamilyIncome$All_incomes_ns_Tot) / G32_FamilyIncome$Tot_Tot)*100,
  #includes no income and partial income entries
  
  #G33
  HouseholdIncome_NS = ((G33_HouseholdIncome$Partial_income_stated_Tot + G33_HouseholdIncome$All_incomes_not_stated_Tot) / G33_HouseholdIncome$Tot_Tot)*100,
  #includes no income and partial income entries
    
  #G35
  SP_House = (G35_NumberInHouse$Num_Psns_UR_1_Total/G35_NumberInHouse$Total_Total)*100,
  
  #G37
  Tenure_NS = (G37_Tenure$Ten_type_NS_Total / G37_Tenure$Total_Total)*100,
  Owned = (G37_Tenure$O_OR_Total/(G37_Tenure$Total_Total))*100,
  Mortgage = ((G37_Tenure$O_MTG_Total)/(G37_Tenure$Total_Total))*100,
  Renting = (G37_Tenure$R_Tot_Total/(G37_Tenure$Total_Total))*100,
  PublicHousing = (G37_Tenure$R_ST_h_auth_Total/(G37_Tenure$Total_Total))*100,
  
  #G40
  Rent_NS = (G40_Rent$Rent_ns_Tot / G40_Rent$Tot_Tot)*100,
  
  # #G37
  # InternetAccess = (G37_Internet$IA_Total/(G37_Internet$Total_Total))*100,
  # InternetAccess_NS = (G37_Internet$IC_not_stated_Total / G37_Internet$Total_Total)*100,
  
  #G43
  Unemployed = G43_Employ$Percent_Unem_loyment_P,
  LFParticipation = G43_Employ$Percnt_LabForc_prticipation_P,
  
  #G45
  DiffAddress = (1 - (G45_Address$Sme_Usl_ad_5_yr_ago_as_2021_P/(G45_Address$Tot_P)))*100,
  
  #G49B
  BachelorAbv = ((G49B_Uni$P_BachDeg_Total + G49B_Uni$P_PGrad_Deg_Total + G49B_Uni$P_GradDip_and_GradCert_Total)/ (Population*(100 - Age00_04 - Age05_14)/100))*100,
  DipCert = ((G49B_Uni$P_AdvDip_and_Dip_Total + G49B_Uni$P_Cert_Lev_Tot_Total) /(Population*(100 - Age00_04 - Age05_14)/100))*100,
  University_NS = ((G49B_Uni$P_Lev_Edu_IDes_Total + G49B_Uni$P_Lev_Edu_NS_Total) / (Population*(100 - Age00_04 - Age05_14)/100))*100,
  #Using the total population 15+ in the denominator, instead of the total given in the response
  
  #G54
  Extractive = ((G54_Industry$P_Ag_For_Fshg_Tot + G54_Industry$P_Mining_Tot + G54_Industry$P_El_Gas_Wt_Waste_Tot) / (G54_Industry$P_Tot_Tot)) * 100,
  Transformative = ((G54_Industry$P_Constru_Tot + G54_Industry$P_Manufact_Tot) / (G54_Industry$P_Tot_Tot)) * 100,
  Distributive = ((G54_Industry$P_WhlesaleTde_Tot + G54_Industry$P_RetTde_Tot + G54_Industry$P_Trans_post_wrehsg_Tot) / (G54_Industry$P_Tot_Tot)) * 100,
  Finance = (G54_Industry$P_Fin_Insur_Tot / (G54_Industry$P_Tot_Tot)) * 100,
  SocialServ = ((G54_Industry$P_Educ_trng_Tot + G54_Industry$P_HlthCare_SocAs_Tot + G54_Industry$P_Art_recn_Tot) / (G54_Industry$P_Tot_Tot)) * 100,
  
  #G60
  ManagerAdminClericalSales = ((G60_Occupation$P_Tot_Managers + G60_Occupation$P_Tot_ClericalAdminis_W + G60_Occupation$P_Tot_Sales_W) / (G60_Occupation$P_Tot_Tot))*100,
  Professional = (G60_Occupation$P_Tot_Professionals / (G60_Occupation$P_Tot_Tot))*100,
  Tradesperson = (G60_Occupation$P_Tot_TechnicTrades_W / (G60_Occupation$P_Tot_Tot))*100,
  Laborer = (G60_Occupation$P_Tot_Labourers / (G60_Occupation$P_Tot_Tot))*100,
  
  #Area
  Area = as.numeric(Area_sqkm$Area_sqkm),
  
  #Other
  EmuneratedElsewhere = 0,
  InternetUse = 0,
  InternetUse_NS = 0
)  %>% # Fill any nulls as zero
  mutate(
    across(everything(), ~replace_na(.x, 0))
  )

# Join electorate names and areas
CED <- read_excel("data-raw/supplement/tidy_CED21.xlsx")

new <- new %>% 
  left_join(CED %>% mutate(ID = as.character(CED)) %>% select(-c("...1", CED)), by = "ID")

# Remove no usual address and offshore rows, and only 
abs2021 <- new %>% 
  filter(!grepl("No usual address", Electorate)) %>% 
  filter(!grepl("Migratory", Electorate)) %>% 
  select(-c(ends_with("_P"), ends_with("_M"), ends_with("_F"), CED_CODE_2021, Age15plus))

# Inflation
# Using: https://www.rba.gov.au/calculator/annualDecimal.html
inflation_rates <- c(1.1514, 1.3303, 1.4613, 1.6003)
abs2021 <- abs2021 %>%
  mutate(MedianFamilyIncome = MedianFamilyIncome/inflation_rates[4],
         MedianHouseholdIncome = MedianHouseholdIncome/inflation_rates[4],
         MedianLoanPay = MedianLoanPay/inflation_rates[4],
         MedianPersonalIncome = MedianPersonalIncome/inflation_rates[4],
         MedianRent = MedianRent/inflation_rates[4])

# Order by electorate, upper case names, rename electorate column and reorder columns, change BornOverseas to BornElsewhere
abs2021 <- abs2021 %>% 
  arrange(Electorate) %>% 
  rename(DivisionNm = Electorate) %>% 
  mutate(DivisionNm = toupper(DivisionNm)) %>% 
  mutate(BornElsewhere = BornOverseas - Born_MidEast - Born_SE_Europe - Born_UK) %>%
  select(-BornOverseas)

abs2021 <- abs2021 %>% 
  select(order(colnames(.))) %>% 
  select(ID, DivisionNm, State, Population, Area, everything()) %>% 
  mutate(ID = as.numeric(ID))


# Save
save(abs2021, file = "data-raw/census/data/abs2021.rda")

# Remove objects from environment
objs = ls()[grepl("^G[0-9]{1-2}", ls())]
for (ob in objs) {remove(list=ob)}
remove(objs)
ropenscilabs/eechidna documentation built on May 4, 2023, 6:51 a.m.