#' Looking Up Product Differentiation
#'
#' Returns Rauch's classification of product differentiation. Rauch classifies 4-digit SITC2 codes according to three possible types: differentiated products ("n"), reference priced ("r"), and homogeneous goods traded on an organized exchange ("w").
#'
#' @param sourcevar An input character vector of industry/product codes.
#' @param origin A string setting the input coding scheme. Supports the following classifications: "HS0" (1988/92), "HS1" (1996), "HS2" (2002), "HS3" (2007), "HS4" (2012), "HS5" (2017), "HS6" (2022), "HS" (combined), "SITC1" (1950), "SITC2" (1974), "SITC3" (1985), "SITC4" (2006), "NAICS2002", "NAICS2007", "NAICS2012", "NAICS2017", "ISIC2" (1968), "ISIC3" (1989), "ISIC4" (2008), "BEC4" (2016).
#' @param setting Choose "CON" (conservative, default) or "LIB" (liberal) classification.
#' @param prop Can be set to "n", "r", or "w", in which case the function will return, respectively, the proportion of type "n", "r", or "w" in the resulting vector of Rauch indices. If prop is not set to any of these, then the function returns, for each input code, a dataframe that summarizes all the frequencies and proportions of type "w", "r", and "n".
#' @return Concords each element of the input vector to 4-digit SITC2 codes, then uses the corresponding codes as input to extract Rauch product differentiation indices.
#' @import tibble tidyr purrr dplyr stringr
#' @importFrom rlang := !! .data
#' @export
#' @source Data from Jon Haveman's International Trade Data page <http://www.macalester.edu/research/economics/PAGE/HAVEMAN/Trade.Resources/TradeData.html#Rauch>.
#' @references Rauch, James E. 1999. "Networks Versus Markets in International Trade," Journal of International Economics 48(1): 7--35.
#' @note Please include leading zeros in codes (e.g., use HS code 010110 instead of 10110). For BEC4 only, use original codes or add trailing zeroes if necessary (e.g., 7 or 700 instead of 007). Results may be buggy otherwise.
#' @examples
#' # SITC2 input
#' get_proddiff(sourcevar = c("22240", "04110"), origin = "SITC2", setting = "CON", prop = "")
#'
#' get_proddiff(sourcevar = c("22240", "04110"), origin = "SITC2", setting = "CON", prop = "r")
#'
#' get_proddiff(sourcevar = c("22240", "04110"), origin = "SITC2", setting = "CON", prop = "w")
#'
#' get_proddiff(sourcevar = c("22240", "04110"), origin = "SITC2", setting = "CON", prop = "n")
#'
#' get_proddiff(sourcevar = c("22240", "04110"), origin = "SITC2", setting = "LIB", prop = "")
#'
#' # SITC3 input
#' get_proddiff(sourcevar = c("22240", "04110"), origin = "SITC3", setting = "CON", prop = "")
#'
#' # SITC4 input
#' get_proddiff(sourcevar = c("22240", "04110"), origin = "SITC4", setting = "CON", prop = "")
#'
#' # HS input
#' get_proddiff(sourcevar = c("1206", "1001", "8546"), origin = "HS", setting = "CON", prop = "")
#'
#' # NAICS input
#' get_proddiff(sourcevar = c("111120", "326199"), origin = "NAICS", setting = "CON", prop = "")
#'
#' # BEC4 input
#' get_proddiff(sourcevar = c("11", "21"), origin = "BEC4", setting = "CON", prop = "")
get_proddiff <- function (sourcevar,
origin,
setting = "CON",
prop = "") {
# sanity checks
setting <- toupper(setting)
if (!setting %in% names(concordance::sitc2_rauch)[2:3]) {
stop("Setting not supported.")
}
# set rauch types
rauch.types <- c("w", "r", "n")
if(origin == "SITC2") {
# get the number of unique digits, excluding NAs
digits <- unique(nchar(sourcevar))
digits <- digits[!is.na(digits)]
if (digits > 4) {
sourcevar.4d <- str_sub(sourcevar, start = 1, end = 4)
} else if (digits < 4) {
sourcevar.4d <- str_pad(sourcevar, width = 4, side = "right", pad = "0")
} else {
sourcevar.4d <- sourcevar
}
# extract rauch for the matches of each input
rauch.list <- map(sourcevar.4d, function(x){
rauch.sub <- concordance::sitc2_rauch[match(x, concordance::sitc2_rauch[,"SITC2"]$SITC2), setting]
rauch.sub
})
} else{
# concord to SITC2
via <- concord(sourcevar, origin, "SITC2", dest.digit = 4, all = TRUE)
# extract rauch for the matches of each input
rauch.list <- map(1:length(sourcevar), function(x){
via.match.sub <- pluck(via, x, "match")
rauch.sub <- concordance::sitc2_rauch[match(via.match.sub, concordance::sitc2_rauch[,"SITC2"]$SITC2), setting]
rauch.sub
})
}
# set list names to input vector
names(rauch.list) <- sourcevar
# give warning when no corresponding Rauch classification exists
if (any(is.na(rauch.list))) {
no.rauch <- names(is.na(rauch.list)[is.na(rauch.list) == TRUE])
warning(paste("No Rauch classification exists for ", origin, " code(s): ", no.rauch, ". Returned NA.\n", sep = ""))
}
# extract frequency and calculate proportions
rauch.freq <- map(sourcevar, function (x) {
freq.r <- map_df(rauch.types, function(y) {
freq.sub <- tibble(rauch = y,
freq = sum(grepl(y, rauch.list[[x]][[1]])))
return(freq.sub)
})
freq.c <- freq.r %>%
mutate(tot = sum(.data$freq),
proportion = .data$freq/.data$tot,
proportion = if_else(is.nan(.data$proportion), NA_real_, .data$proportion)) %>%
select(.data$rauch, .data$freq, .data$proportion)
freq.c <- as.data.frame(freq.c)
return(freq.c)
})
# set list names to input vector
names(rauch.freq) <- sourcevar
# force NA to string so that map_df can run without error
names(rauch.freq) <- if_else(is.na(names(rauch.freq)), "NA", names(rauch.freq))
# if prop is specified, calculate relevant proportions, otherwise return full list
if (tolower(prop) %in% rauch.types) {
out <- map_df(rauch.freq, function(x){
out.sub <- x %>%
filter(.data$rauch == prop) %>%
pull(.data$proportion)
})
out <- unlist(out)
# convert NaNs to NAs
out[is.nan(out)] <- NA_real_
} else {
out <- rauch.freq
}
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.