#requireNamespace("roxygen2")
requireNamespace("Rcpp")
requireNamespace("dplyr")
requireNamespace("tidyr")
requireNamespace("readxl")
requireNamespace("economiccomplexity")
requireNamespace("usethis")
requireNamespace("Matrix")
requireNamespace("tibble")
library("Rcpp")
library("tidyr")
library("dplyr")
library("readxl")
library("economiccomplexity")
library("usethis")
library("roxygen2")
#' IOPS
#'
#' @description Takes user inputted trade data, any acceptable ISO country code and industrial value chain mapping to calculate various metrics (Economic- and Product complexity metrics, distance metrics, opportunity gain, and inequality metrics) of a given country in order to facilitate better decision making regarding industrial policymaking.
#'
#' @import dplyr
#' @import Rcpp
#' @import tidyr
#' @import readxl
#' @import usethis
#' @import economiccomplexity
#' @import roxygen2
#' @importFrom stats aggregate
#' @importFrom utils write.csv
#' @importFrom utils read.csv
#'
#' @param CountryCode (Type: character/integer) Any accepted ISO country code could be used, e.g. \code{"United Kingdom"}, \code{"GBR"}, \code{"GB"}, \code{828} would all be accepted if the United Kingdom is the desired country. This version of the package uses country_codes_V202201 from th BACI Hs17 2020 dataset, but can be changed in \code{inst/extdata}.
#' @param tradeData (Type: csv) Accepts any CEPII BACI trade data. NOTE: tradeData and GVCMapping must be from the same "H" Family, e.g. both are from H3, etc., in order for the program to work correctly.
#' @param ComplexMethod (Type: character) Methods used to calculate complexity measures. Can be any one of these methods: \code{"fitness"}, \code{"reflections"} or \code{"eigenvalues"}. Defaults to "eigenvalues".
#' @param iterCompl (Type: integer) The number of iterations that the chosen complexity measure must use. Defaults to \code{iterCompl = 20}.
#' @param GVCMapping (Type: csv) The desired value chain to be analysed. With Columns "Tiers", "Activity", and "HSCode". NOTE: tradeData and GVCMapping must be from the same "H" Family, e.g. both are from H3, etc., in order for the program to work correctly.
#'
#' @examples
#' IOPS(CountryCode = 710,
#' tradeData = ExampleTradeData,
#' ComplexMethod = "eigenvalues",
#' iterCompl = 10,
#' GVCMapping = ExampleValueChain)
#'
#' @return A list that constains ECI, PCI, Opportunity_Gain, distance, density, M_absolute, M_binary, Tier_Results, Product_Category_Results, Product_Results, respectively.
#' @export
IOPS <- function(CountryCode ,tradeData , ComplexMethod = "eigenvalues", iterCompl = 20, GVCMapping){
#Assign empty variables
country_code <- NA
country_name_abbreviation <- NA
country_name_full <- NA
iso_2digit_alpha <- NA
iso_3digit_alpha <- NA
hs_product_code <- NA
export_value <-NA
GVCactivityNumber <- NA
tierNumber <- NA
#tradeData2018 <- read.csv("H5/BACI_HS17_Y2018_V202001.csv", header = TRUE)
CountryDataPath <- system.file("extdata", "country_codes_V202201.csv", package = "iopspackage")
AllCountryCodes <- read.csv(CountryDataPath)
country_code <- AllCountryCodes$CountryCode
countryChosen <- AllCountryCodes %>%
filter(country_code == CountryCode | country_name_abbreviation == CountryCode | country_name_full == CountryCode | iso_2digit_alpha == CountryCode| iso_3digit_alpha == CountryCode)
#Determine country's 3-digit country code
chosenCountry <- as.character(countryChosen["iso_3digit_alpha"])
#Error conditions
if( dim(countryChosen)[1] == 0) stop('Invalid country code')
if( ComplexMethod == "reflections"&& ComplexMethod == "eigenvalues" && ComplexMethod == "fitness") stop('Invalid complexity method')
#----------------------------- Import Trade Data -----------------------------
#Determine country's 3-digit country code
message(paste0("Selected country: ", chosenCountry))
#+++++++++++++++++++++++++++
# read in trade data
tradeData2018 <- tradeData
#colnames(tradeData2018) <- c("year", "location_code", "hs_product_code", "export_value", "import_value", "export_RCA", "import_RCA")
#tradeData_Export <- as.data.frame(matrix(rep(0, times = nrow(tradeData2018)*4), nrow = nrow(tradeData), ncol = 4))
tradeData2018 <- tradeData2018[c("t","v","i","k")]
colnames(tradeData2018) <- c("year","export_value","country_code","hs_product_code")
#+++++++++++ Merge data sets (for compatibility) ++++++++++++++
ISO_Country <-AllCountryCodes[,c(1, 5)]
colnames(ISO_Country)[2] <- "location_code"
common_col_names <- intersect(names(tradeData2018), names(ISO_Country))
MergedTradeData <- merge(tradeData2018, ISO_Country, by = common_col_names)
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
tradeData2018 <- MergedTradeData[,c("year","export_value","location_code","hs_product_code")]
message("Trade data succesfully imported")
#======read in industry value chain mapping======
#GVCMapping <- read.csv("H5/Auto Value Chain - updated for H5 - v2.csv", header=FALSE)
colnames(GVCMapping) <- c("tierNumber", "GVCactivityNumber", "HScode")
#GVCMapping: || tierNumber | GVCactivityNumber | HScode ||
message("Value chain succesfully imported")
#_______________________________________________________________________________________________
expdata <- tradeData2018[ ,c(2, 3, 4)]
#expdata <- tradeData2018[ ,c(4, 2, 3)]
wide_expdata <- expdata %>% pivot_wider(names_from = hs_product_code, values_from = export_value, values_fn = sum, names_sort = TRUE, values_fill = 0) # transform the data so that all data for one country is contained in a single row
#----Get Mbin, Mabs, complexities, CSRCA-----
# create matrix of RCA values (Mabs) and binary RCA matrix (Mbin)
Mabs <- balassa_index(expdata, discrete = FALSE, country = "location_code", product = "hs_product_code", value = "export_value")
Mbin <- balassa_index(expdata, country = "location_code", product = "hs_product_code", value = "export_value")
M_binary <- as.matrix(Mbin)
M_absolute <- as.matrix(Mabs)
message("M-matrices calculated")
Products <- unique(expdata$hs_product_code) # 1219 product codes, already sorted
Countries <- rownames(Mabs) # 235 countries, alphabetically ordered
# create a matrix containing the RCA data for the country being analysed
# chosen country subset: wide_expdata[which(wide_expdata$location_code == chosenCountry), -1]
CS_RCAmat <- cbind.data.frame(colnames(wide_expdata[which(wide_expdata$location_code == chosenCountry), -1]), as.numeric(wide_expdata[which(wide_expdata$location_code == chosenCountry), -1]), as.numeric(colSums(wide_expdata[ ,-1])), as.numeric(Mabs[chosenCountry, ]))
colnames(CS_RCAmat) <- c("hs_product_code", paste(chosenCountry, "export_of_product", sep = "_"), "World_export_of_product", paste(chosenCountry, "RCA", sep = "_"))
# || hs_product_code | ZAF_export_of_product | World_export_of_product | ZAF_RCA ||
message("RCA calculated")
#+++++++++++++++++++++++++++COMPLEXITY MEASURES++++++++++++++++++++++++++++++#
#User selects one of three methods to calculate complexities,
if (ComplexMethod == "eigenvalues"){
message(paste0("Starting calculation of complexity measures using the '", ComplexMethod, "' method" ))
# use binary M matrix to calculate complexity using EIGENVALUES, number of iterations must be provided
#NOTE: This specific method takes a significantly long time to compute
Compl_Meth <- complexity_measures(Mbin, method = "eigenvalues")
Method <- "eigv" #method descriptor for Excel/CSV file
} else if(ComplexMethod == "reflections") {
message(paste0("Starting calculation of complexity measures using the '", ComplexMethod, "' method" ))
# use binary M matrix to calculate complexity using METHOD OF REFLECTIONS, number of iterations must be provided
Compl_Meth <- complexity_measures(Mbin, method = "reflections", iterations = iterCompl)
Method <- "refl"
} else {
message(paste0("Starting calculation of complexity measures using the '", ComplexMethod, "' method" ))
# use binary M matrix to calculate complexity using FITNESS COMPLEXITY, number of iterations must be provided
Compl_Meth <- complexity_measures(Mbin, method = "fitness", iterations = iterCompl)
Method <- "fitn"
}
#determine complexities with input method
ECI <- Compl_Meth$complexity_index_country
PCI <- Compl_Meth$complexity_index_product
#-----Get proximity, opp gain, opp val, distance, density-----
# calculate the proximity between products
proxes <- proximity(Mbin)
Proximities <- proxes$proximity_product
Similarities <- proxes$proximity_country
# compute the opportunity value
opporVal <- complexity_outlook(Mbin, Proximities, PCI)
cntryOpporVal <- opporVal$complexity_outlook_index
opporGain <- opporVal$complexity_outlook_gain
Opportunity_Gain <- as.matrix(opporGain)
# compute the densities and distances
# ______ (1 - Mbin) creates matrix of ones where Mbin has zeroes and vice versa
distance <- tcrossprod(1 - as.matrix(Mbin), as.matrix(Proximities)/rowSums(as.matrix(Proximities)))
density <- tcrossprod(as.matrix(Mbin), as.matrix(Proximities)/rowSums(as.matrix(Proximities)))
message("Complexity measures calculated")
#-----create GVCfull, GVCacts, GVCtiers-----
message("Starting IO-PS calculations")
GVCMapping <- GVCMapping[order(GVCMapping$tierNumber, GVCMapping$GVCactivityNumber, GVCMapping$HScode), ] # reorder GVCMapping according to tier, then activity, then HScode
numTiers <- length(unique(GVCMapping$tierNumber)) # = 5
numActs <- length(unique(GVCMapping$GVCactivityNumber)) # = 49
numProds <- length(unique(GVCMapping$HScode)) # = 272
numProdsInAct <- as.matrix(GVCMapping[ ,c(2, 3)] %>% group_by(GVCactivityNumber) %>% tally())[ ,2]
numActsInTier <- as.matrix(unique(GVCMapping[ ,c(1, 2)]) %>% group_by(tierNumber) %>% tally())[ ,2]
numProdsInTier <- as.matrix(GVCMapping[ ,c(1, 3)] %>% group_by(tierNumber) %>% tally())[ ,2]
#-----GVCfull-----
GVCfull <- as.data.frame(matrix(rep(0, times = numProds*15), nrow = numProds, ncol = 15)) # initialise GVCfull
colnames(GVCfull) <- c("tierNumber", "GVCactivityNumber", "HScode", "Complexity", "Density", "RCA", "Complexity_ifOpp", "Distance_ifOpp", "OpporGain_ifOpp", "RCA_ifOpp", paste(chosenCountry, "export_of_product", sep = "_"), "World_export_of_product", paste(chosenCountry, "export_of_product_ifOpp", sep = "_"), "World_export_of_product_ifOpp", "productPosition")
GVCfull[ , c(1, 2, 3)] <- GVCMapping
GVCfull <- GVCfull[order(GVCfull$HScode), ] # order according to HS code
GVCfull[ , 4] <- as.numeric(PCI[which(Products %in% GVCMapping$HScode)])
GVCfull[ , 5] <- as.numeric(density[chosenCountry, which(Products %in% GVCMapping$HScode)]) # for distances and densities, subset the one row that is specific to the country being analysed
GVCfull[ , 6] <- CS_RCAmat[which(Products %in% GVCMapping$HScode), 4]
GVCfull[ , c(11, 12)] <- CS_RCAmat[which(Products %in% GVCMapping$HScode), c(2, 3)]
GVCfull[ , 15] <- which(Products %in% GVCMapping$HScode)
GVCfull[which(GVCfull$RCA < 1), c(7, 10, 13, 14)] <- GVCfull[which(GVCfull$RCA < 1), c(4, 6, 11, 12)]
GVCfull[ , 8] <- as.numeric(distance[chosenCountry, which(Products %in% GVCMapping$HScode)])
GVCfull[-which(GVCfull$RCA < 1), 8] <- 0
GVCfull[ , 9] <- as.numeric(opporGain[chosenCountry, which(Products %in% GVCMapping$HScode)])
GVCfull[-which(GVCfull$RCA < 1), 9] <- 0
GVCfull <- GVCfull[order(GVCfull$tierNumber, GVCfull$GVCactivityNumber, GVCfull$HScode), ]
#-----GVCacts-----
numOppProdsInAct <- rep(0, times = length(numProdsInAct))
numOppProdsInAct[as.matrix(GVCMapping[which(GVCMapping$HScode %in% GVCfull[which(GVCfull$RCA < 1), 3]), ] %>% group_by(GVCactivityNumber) %>% tally())[ ,1]] <- as.matrix(GVCMapping[which(GVCMapping$HScode %in% GVCfull[which(GVCfull$RCA < 1), 3]), ] %>% group_by(GVCactivityNumber) %>% tally())[ ,2]
#in case of mismatch between data lengths
if (length(numProdsInAct)<length(numOppProdsInAct)) {
stop("Error: data selected for 'tradeData' and 'GVCmapping' isn't compatible with each other. Select data within the same 'H' class, i.e., 'H0', 'H3', or 'H5'")
}
GVCacts <- as.data.frame(matrix(rep(0, times = numActs*15), nrow = numActs, ncol = 15)) # initialise GVCacts
colnames(GVCacts) <- c("tierNumber", "GVCactivityNumber", "AvgComplexity", "AvgDensity", "AvgRCA", "AvgComplexity_ifOpp", "AvgDistance_ifOpp", "AvgOpporGain_ifOpp", "AvgRCA_ifOpp", paste(chosenCountry, "export_of_activity", sep = "_"), "World_export_of_activity", paste(chosenCountry, "export_of_activity_ifOpp", sep = "_"), "World_export_of_activity_ifOpp", "NumberOfProductsInActivity", "NumberOfOppProductsInActivity")
GVCacts[ , c(1, 2)] <- unique(GVCMapping[ ,c(1, 2)])
GVCacts[ , c(3:5)] <- aggregate(GVCfull[ , c(4:6)], by = list(activity = GVCfull$GVCactivityNumber), FUN = sum)[, -1]/numProdsInAct # also works: #aggregate(GVCfull[ , c(4:6)], by = list(activity = GVCfull$GVCactivityNumber), FUN = mean)[ , -1]
GVCacts[-which(numOppProdsInAct == 0) , c(6:9)] <- aggregate(GVCfull[-which(GVCfull$GVCactivityNumber %in% which(numOppProdsInAct == 0)) , c(7:10)], by = list(activity = GVCfull$GVCactivityNumber[-which(GVCfull$GVCactivityNumber %in% which(numOppProdsInAct == 0))]), FUN = sum)[, -1]/(numOppProdsInAct[-which(numOppProdsInAct == 0)])
GVCacts[ , c(10:13)] <- aggregate(GVCfull[ , c(11:14)], by = list(activity = GVCfull$GVCactivityNumber), FUN = sum)[ , -1]
GVCacts[ , 14] <- numProdsInAct
GVCacts[ , 15] <- numOppProdsInAct
#-----GVCtiers-----
numOppProdsInTier <- rep(0, times = length(numActsInTier))
numOppActsInTier <- rep(0, times = length(numActsInTier))
numOppProdsInTier[as.matrix(GVCMapping[which(GVCMapping$HScode %in% GVCfull[which(GVCfull$RCA < 1), 3]), ] %>% group_by(tierNumber) %>% tally())[ ,1]] <- as.matrix(GVCMapping[which(GVCMapping$HScode %in% GVCfull[which(GVCfull$RCA < 1), 3]), ] %>% group_by(tierNumber) %>% tally())[ ,2]
numOppActsInTier[as.matrix(GVCacts[which(GVCacts$GVCactivityNumber %in% GVCacts[which(GVCacts$AvgRCA < 1), 2]), ] %>% group_by(tierNumber) %>% tally())[ ,1]] <- as.matrix(GVCacts[which(GVCacts$GVCactivityNumber %in% GVCacts[which(GVCacts$AvgRCA < 1), 2]), ] %>% group_by(tierNumber) %>% tally())[ ,2]
GVCtiers <- as.data.frame(matrix(rep(0, times = numTiers*14), nrow = numTiers, ncol = 14)) # initialise GVCtiers
colnames(GVCtiers) <- c("tierNumber", "AvgComplexity", "AvgDensity", "AvgRCA", "AvgComplexity_ifOpp", "AvgDistance_ifOpp", "AvgOpporGain_ifOpp", "AvgRCA_ifOpp", paste(chosenCountry, "export", sep = "_"), "World_export", paste(chosenCountry, "export_ifOpp", sep = "_"), "World_export_ifOpp", "NumberOfActivitiesInTier", "NumberOfOppActivitiesInTier")
#in case of no values numOppProdsInTier == 0, the first line then breaks, hence the else statement
if(sum(GVCfull$tierNumber %in% which(numOppProdsInTier == 0) == TRUE) > 0){
GVCtiers[-which(numOppProdsInTier == 0) , c(5:8)] <- aggregate(GVCfull[-which(GVCfull$tierNumber %in% which(numOppProdsInTier == 0)) , c(7:10)], by = list(tier = GVCfull$tierNumber[-which(GVCfull$tierNumber %in% which(numOppProdsInTier == 0))]), FUN = sum)[, -1]/(numOppProdsInTier[-which(numOppProdsInTier == 0)])
} else {
GVCtiers[, c(5:8)] <- aggregate(GVCfull[ ,c(7:10)], by = list(tier = GVCfull$tierNumber), FUN = mean)[, -1]
J <-"Else"
}
GVCtiers[ , 1] <- unique(GVCMapping$tierNumber)
GVCtiers[ , c(2:4)] <- aggregate(GVCfull[ , c(4:6)], by = list(tier = GVCfull$tierNumber), FUN = sum)[, -1]/numProdsInTier
#GVCtiers[-which(numOppProdsInTier == 0) , c(5:8)] <- aggregate(GVCfull[-which(GVCfull$tierNumber %in% which(numOppProdsInTier == 0)) , c(7:10)], by = list(tier = GVCfull$tierNumber[-which(GVCfull$tierNumber %in% which(numOppProdsInTier == 0))]), FUN = sum)[, -1]/(numOppProdsInTier[-which(numOppProdsInTier == 0)])
GVCtiers[ , c(9:12)] <- aggregate(GVCfull[ , c(11:14)], by = list(tier = GVCfull$tierNumber), FUN = sum)[ , -1]
GVCtiers[ , 13] <- numActsInTier
GVCtiers[ , 14] <- numOppActsInTier
#///////////////////////////////////////////////////////////////////////////////
Product_Results <- GVCfull
Product_Category_Results <- GVCacts
Tier_Results <- GVCtiers
#------------------ Converting back to 'country_codes' ------------------
#ECI
ECI_df <- as.data.frame(ECI, check.names=FALSE)
ECI_df <- cbind(rownames(ECI_df), data.frame(ECI_df, row.names=NULL))
colnames(ECI_df)[1] <- "location_code"
ECI_df <- merge.data.frame(ISO_Country, ECI_df, by = intersect(names(ISO_Country), names(ECI_df)))[, c(2,3)]
ECI_df <- data.frame(ECI_df[order(ECI_df$country_code),], row.names=1)
#PCI
PCI_df <- as.data.frame(PCI)
#Opportunity Gain
OG_df <- as.data.frame(Opportunity_Gain)
OG_df <- cbind(rownames(OG_df), data.frame(OG_df, row.names=NULL))
colnames(OG_df)[1] <- "location_code"
OG_df <- merge.data.frame(ISO_Country, OG_df, by = intersect(names(ISO_Country), names(OG_df)))[,c(-1)]
OG_df <- data.frame(OG_df[order(OG_df$country_code),], row.names=1)
names(OG_df) <- sub("^X", "", names(OG_df))
#distance
dist_df <- as.data.frame(distance)
dist_df <- cbind(rownames(dist_df), data.frame(dist_df, row.names=NULL))
colnames(dist_df)[1] <- "location_code"
dist_df <- merge.data.frame(ISO_Country, dist_df, by = intersect(names(ISO_Country), names(dist_df)))[,c(-1)]
dist_df <- data.frame(dist_df[order(dist_df$country_code),], row.names=1)
names(dist_df) <- sub("^X", "", names(dist_df))
dist_df <- as.matrix(dist_df)
#density
dens_df <- as.data.frame(density)
dens_df <- cbind(rownames(dens_df), data.frame(dens_df, row.names=NULL))
colnames(dens_df)[1] <- "location_code"
dens_df <- merge.data.frame(ISO_Country, dens_df, by = intersect(names(ISO_Country), names(dens_df)))[,c(-1)]
dens_df <- data.frame(dens_df[order(dens_df$country_code),], row.names=1)
names(dens_df) <- sub("^X", "", names(dens_df))
dens_df <- as.matrix(dens_df)
#M-absolute
Mabs_df <- as.data.frame(M_absolute)
Mabs_df <- cbind(rownames(Mabs_df), data.frame(Mabs_df, row.names=NULL))
colnames(Mabs_df)[1] <- "location_code"
Mabs_df <- merge.data.frame(ISO_Country, Mabs_df, by = intersect(names(ISO_Country), names(Mabs_df)))[,c(-1)]
Mabs_df <- data.frame(Mabs_df[order(Mabs_df$country_code),], row.names=1)
names(Mabs_df) <- sub("^X", "", names(Mabs_df))
Mabs_df <- as.matrix(Mabs_df)
#M-binary
Mbin_df <- as.data.frame(M_binary)
Mbin_df <- cbind(rownames(Mbin_df), data.frame(Mbin_df, row.names=NULL))
colnames(Mbin_df)[1] <- "location_code"
Mbin_df <- merge.data.frame(ISO_Country, Mbin_df, by = intersect(names(ISO_Country), names(Mbin_df)))[,c(-1)]
Mbin_df <- data.frame(Mbin_df[order(Mbin_df$country_code),], row.names=1)
names(Mbin_df) <- sub("^X", "", names(Mbin_df))
Mbin_df <- as.matrix(Mabs_df)
#-------------------------------------------------------
ReturnIOPS <- list(ECI_df, PCI_df, OG_df, dist_df, dens_df, Mabs_df, Mbin_df, Tier_Results, Product_Category_Results, Product_Results)
names(ReturnIOPS) <- c("ECI", "PCI", "Opportunity_Gain", "distance", "density", "M_absolute", "M_binary", "Tier_Results", "Product_Category_Results", "Product_Results")
message("--------DONE!--------")
return(ReturnIOPS)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.