knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "man/figures/README-", out.width = "100%", dpi=300 )
This is a minimal package to help with the compilation and analysis of tax and economic data in South Africa. The package only contains seven main sets of data, three functions and the personal income tax tables from 1995/96 to 2024/25.
The data includes:
The three functions and the personal income tax tables are intended to help with calculating tax liabilities, particularly when used with the administrative data from SARS.
You can install the package from GitHub with:
# install.packages("remotes") remotes::install_github("chrisaxelson/tax4sa")
The data can be accessed by entering either NT_Budget_revenue
, NT_S32_revenue
, SARB_Quarterly_Bulletin
, STATSSA_P0441_GDP
,DMRE_fuel
, etc. and is in a tidy format to ease analysis within R. The package needs to be reinstalled to update the data. If you would like to load all the data into your environment to check what is available, you can run:
library(tax4sa) load_tax4sa_data()
The tax revenue data is split by three revenue classifications in columns T1
, T2
and T3
and all figures are in ZAR 000's.
library(dplyr) library(knitr) library(kableExtra) # Check revenue data NT_Budget_revenue %>% filter(Fiscal_year == 2025) %>% select(T1:T3, Year, Revenue) %>% mutate(Revenue = round(Revenue,0)) %>% head(5) %>% kable(format.args = list(big.mark = ","), caption = "Annual tax revenue (R'000s)") # And monthly NT_S32_revenue %>% filter(T3 == "Health promotion levy") %>% select(Tax = T3, Month, Quarter, Year, Fiscal_year, Revenue) %>% mutate(Year = as.character(Year), Fiscal_year = as.character(Fiscal_year)) %>% tail(5) %>% kable(format.args = list(big.mark = ","), caption = "Monthly health promotion levy revenue (R'000s)")
Or you can download the annual and monthly data in one spreadsheet.
# This saves it in your current working directory # download.file("https://raw.githubusercontent.com/chrisaxelson/tax4sa/master/data-raw/NT/Revenue.xlsx", # "Revenue.xlsx")
Similarly, you can access the forecasts that were made in the annual Budget Reviews below.
# Check revenue data NT_Budget_forecasts %>% filter(Publication_year == 2023, Category == "Gross tax revenue") %>% mutate(Publication_year = as.character(Publication_year)) %>% kable(format.args = list(big.mark = ","), caption = "Tax revenue forecasts (R million)")
Or you can download the forecasts in one spreadsheet.
# This saves it in your current working directory # download.file("https://raw.githubusercontent.com/chrisaxelson/tax4sa/master/data-raw/NT/Forecasts.xlsx", # "Forecasts.xlsx")
The line-by-line trade data from the South African Revenue Service is too large to be included directly in the package, but can be downloaded separately per year of data and type of trade (imports or exports). Each file is around 20MB.
The following code downloads the data into your working directory using the piggyback package. The data is likely to be too large to be loaded into R on most computers, so it is saved as individual parquet files where you can use duckdb to query all the data without moving it into RAM. Any subsequent runs of the code will only download updated data files.
library(piggyback) # # Download individual files - can adjust imports to exports and the year # pb_download("SARS_imports_2022.parquet", # repo = "chrisaxelson/tax4sa") # # Or download ALL the trade data from Github - about 600MB # # If run again, will only download updated data # pb_download(repo = "chrisaxelson/tax4sa") # Quick example of how to access the data library(duckdb) # Create connection to temporary database in memory con <- dbConnect(duckdb()) # Reference from all the parquet files in that folder tbl(con, "SARS_*.parquet") %>% head(5) %>% mutate(YearMonth = as.character(YearMonth)) %>% select(TradeType, District = DistrictOfficeName, Origin = CountryOfOriginName, Destination = CountryOfDestinationName, Unit = StatisticalUnit, YearMonth, ChapterAndDescription, Quantity = StatisticalQuantity, Value_ZAR = CustomsValue) %>% kable(format.args = list(big.mark = ","), caption = "Monthly trade data")
# Look for SARB economic data on GDP SARB_Quarterly_Bulletin_info %>% filter(grepl("Gross domestic product at market prices", Description), Frequency == "K1") %>% select(-Description, -Frequency) %>% kable() SARB_Quarterly_Bulletin %>% filter(Code == "KBP6006K") %>% select(-Month) %>% tail(5) %>% kable()
# Look for STATSSA inflation data STATSSA_P0141_CPI_COICOP %>% filter(H04 == "CPI Headline") %>% select(H01, H04, Date_original, Value) %>% mutate(Annual_inflation = as.numeric(Value) / as.numeric(lag(Value, 12)) - 1, Annual_inflation = round(Annual_inflation * 100, 3)) %>% tail(5) %>% kable()
Note that the Price
column reflects retail prices for unleaded petrol, wholesale list prices for diesel and illuminating paraffin and maximum retail price for liquified petroleum gas.
# Look for STATSSA inflation data DMRE_fuel %>% select(Fuel_type:General_fuel_levy) %>% tail(10) %>% kable()
The three functions are tax_calculation
, pit
and pit_manual
. The first is a generic function to apply a tax table to a value, while the latter two specifically calculate the personal income tax liability in South Africa. pit_manual
allows for a custom tax table to be applied to cater for modelling the impacts of changes in the personal income tax tables. The package includes a list of historical tax tables to be used in the calculations.
# Accessing tax tables tax_calculation(100000, Tax_tables$PIT_brackets_2024) # Calculate personal income tax pit(income = 1000000, age = 53, mtc = 2550, tax_year = 2024) # Same calculation in a relatively large dataframe with differing variables individuals <- 1e6 df <- data.frame(Taxable_income = round(runif(individuals, 0, 3000000),0), Age = round(runif(individuals, 18, 80),0), MTC = round(runif(individuals, 0, 6000), 0), Tax_year = round(runif(individuals, 2014, 2020), 0)) system.time({ df <- df %>% mutate(Simulated_tax = pit(Taxable_income, Age, MTC, Tax_year)) })
library(tax4sa) library(dplyr) library(ggplot2) library(scales) # Create a tax to GDP chart - revenue per year first Total_revenue <- NT_Budget_revenue %>% group_by(Fiscal_year) %>% summarise(Revenue = sum(Revenue)) # Get nominal GDP across fiscal year by summing per quarter GDP_fiscal <- STATSSA_P0441_GDP %>% filter(H03 == "QNU1000") %>% group_by(Fiscal_year) %>% summarise(GDP = sum(Value)) %>% filter(Fiscal_year < 2025, Fiscal_year > 1993) # Join together and create tax to GDP Tax_to_GDP <- GDP_fiscal %>% inner_join(Total_revenue, by = "Fiscal_year") %>% mutate(Revenue = Revenue / 1000, Tax_to_GDP = Revenue / GDP) # Chart ggplot(Tax_to_GDP, aes(x = Fiscal_year, y = Tax_to_GDP)) + geom_line() + geom_point() + geom_text(aes(label = ifelse(Fiscal_year == 2024, paste0(round(Tax_to_GDP,4) * 100, "%"),'')), hjust=0.5, vjust=1.5, show.legend = FALSE) + scale_y_continuous(labels = scales::percent_format(accuracy = 1L)) + theme_classic() + ylab("Tax to GDP") + xlab("Fiscal year") + ggtitle("Gross tax revenue to GDP")
library(lubridate) DMRE_fuel %>% filter(Fuel_type == "95_ULP", Region == "Gauteng", Date > dmy("01012016")) %>% ggplot(aes(x = Date, y = Price/100)) + geom_line() + geom_point() + theme_classic() + ylab("Retail price (R/litre)") + ggtitle("Retail price of 95 unleaded petrol in Gauteng") + geom_text(aes(label = ifelse(Date == "2022-07-06", paste0("R", Price/100),'')), hjust=1.2, vjust=0.5, show.legend = FALSE) DMRE_fuel %>% filter(Fuel_type == "95_ULP", Region == "Gauteng", Date > dmy("01012016")) %>% mutate(Tax = General_fuel_levy + Road_accident_fund_levy + Customs_and_excise_levy + Demand_side_management_levy, Tax_percentage = Tax / Price) %>% ggplot(aes(x = Date, y = Tax_percentage)) + geom_line() + geom_point() + theme_classic() + scale_y_continuous(labels = scales::percent_format(accuracy = 1L)) + ylab("Percentage of retail price") + ggtitle("Levies as a percentage of retail price", subtitle = c("Includes the general fuel levy, the Road Accident Fund levy, customs levy and DSML"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.