#' Get data from a bottom node in SCB API
#'
#' This function fetches actual data (i.e. values) from the SCB web API.
#'
#' @param url URL to get data from (it is usually sufficient to submit the base URL, supplied via the baseURL() function, and the name of the variable).
#' @param dims A list of dimensional parameters to filter data by. Note that values \emph{must} be submitted for all dimensions of the data. If you don't want to filter data, submit an asterisk in quotation marks ("*") instead of values for that dimension.
#' @param clean Clean and melt the data to R format.
#'
#' @details
#' There are five documented filter types in the SCB API documentation; "Item", "All", "Top", "Agg" and "Vs". This function currently only supports the "Item" and "All" modes.
#' To use "Item" selection, simply submit a value or vector of values with each dimensional parameter. To use "All" selection, submit a wildcard asterisk ("*") instead of a value.
#' For detailed examples see the installed example files in the \code{examples} folder of \code{path.package("sweSCB")} (these are also viewable on the project's GitHub page).
#'
#' @seealso
#' \code{\link{scbGetMetadata}}, \code{\link{scbGetDims}}, \code{\link{scbGetLevels}}
#'
#' @export
#' @examples
#' ## CONTINUED FROM EXAMPLES IN scbGetMetadata()
#' # Get metadata for a variable
#' url <- paste(c(baseURL(),"AM","AM0102","AM0102A","KLStabell14LpMan"), collapse="/")
#' metadata <- scbGetMetadata(url)
#'
#' # Get dimensions (names of dimensions are printed in the terminal)
#' dims <- scbGetDims(metadata)
#'
#' # Get data
#' test <- scbGetData(metadata$URL, dims=list(
#' Myndighet = "C02",
#' Kon = "*",
#' Heltiddeltid = "*",
#' ContentsCode = "*",
#' Tid = "*"
#' ))
#'
#' # Examine data
#' View(test)
#'
scbGetData <- function(url, dims, clean = FALSE) {
dimNames <- names(dims)
queryBody <- list()
# Define the query list
for (i in 1:length(dims)) {
if (length(dims[[dimNames[i]]]) == 1) {
filter = ifelse(dims[[dimNames[i]]] == "*", "all", "item")
} else {
filter = "item"
}
queryBody[[i]] <- list(
code = dimNames[i],
selection = list(filter = filter,
values = as.list(dims[[dimNames[i]]])
))
}
# Get data
response <- try(POST(
url = url,
body = toJSON(list(
query = queryBody,
response = list(format = "csv")
))
), silent=TRUE)
# Print error message
if (class(response)=="try-error"){
stop(str_join("No internet connection to ",url),
call.=FALSE)
}
if(response$headers$statusmessage != "OK") {
stop(str_join(response$headers$status, response$headers$statusmessage, content(response,as='text'), sep=" "),
call.=FALSE)
}
# Parse data into human-readable form
# (Due to a weird encoding issue on Windows this generates a warning
# about faulty encoding. Hence the suppressMessages() encapsulation...)
suppressMessages(a <- content(response, as="text"))
if(str_sub(a,1,1)==",") a <- str_sub(a,2,nchar(a)) # Correcting when the first element is , (few variables)
b <- read.table(textConnection(a), sep=',', header=TRUE, stringsAsFactors=FALSE)
head <- str_split(string=str_sub(a, start=1, end=str_locate(a,"\n")[[1]]),"\",\"")[[1]]
head <- str_replace_all(string=head,pattern="\r|\n|\"","")
rm(a)
# Clean and melt data
if (clean) {
b <- .scbClean(data2clean=b, head=head, url=url)
}
return(b)
}
#' Clean raw data from SCB
#'
#' This function clean the raw data from SCB to R tall R format.
#'
#' @param data2clean Data to clean.
#' @param head Full variable names as character vector
#' @param url url to the bottom nod (to get meta data)
#'
#' @return data frame melted and in R (numeric) format
#'
.scbClean <- function(data2clean, head, url) {
# Assertions
stopifnot(ncol(data2clean) == length(head))
stopifnot(class(data2clean) == "data.frame")
stopifnot(class(head) == "character")
stopifnot(class(url) == "character")
# Convert to data table
data2clean <- as.data.table(data2clean)
# Get metadata to use in creating factors of Tid and contentCode
contentNode <- scbGetMetadata(url)
# Collect factor labels for tid and contentCode and convert
# other variables to factor variables
idvars <- character(0)
for (content in contentNode$variables$variables) {
if (content$code %in% c("Tid", "ContentsCode")) {
if (content$code == "Tid") { valTextTid <- content$values }
if (content$code == "ContentsCode") { valTextContentsCode <- content$values }
next()
}
varName <- content$text
Encoding(varName) <- "UTF-8"
idvars <- c(idvars, varName)
}
notIdIndex <- !head%in%idvars
# Remove , in variable titles (not compatible with data.table melt)
idvars <- str_replace_all(idvars, pattern=",", ":")
# Rename columns
newhead <- head
newhead[notIdIndex] <-
unlist(
lapply(str_split(head[notIdIndex],pattern=" "),
FUN=function(x) {
if(length(x)>1) {
paste(paste(x[1:(length(x)-1)],collapse=" "),"$",x[length(x)],sep="")
} else {x}})
)
newhead[!notIdIndex] <- idvars
setnames(data2clean,old=names(data2clean), new=newhead)
# Melt the data to long format
meltData <- data2clean[, list(variable = names(.SD), value = unlist(.SD, use.names = F)), by = eval(idvars)]
meltData <- as.data.frame(meltData)
# Convert to factors
for (idvar in idvars){
meltData[,idvar] <- as.factor(meltData[,idvar])
}
# Add variables tid, tabellinneh\u00e5ll and v\u00e4rde
epicSplit <- str_locate(meltData$variable,pattern="\\$")[,1]
meltData[, "tid"] <- factor(str_sub(meltData$variable,start=epicSplit+1))
meltData[, "tabellinneh\u00e5ll"] <- factor(str_sub(meltData$variable,end=epicSplit-1))
meltData[, "v\u00e4rde"] <- suppressWarnings(as.numeric(str_replace_all(meltData$value,"\\s","")))
# Remove variables wiyhout any use
meltData$value <- NULL
meltData$variable <- NULL
return(meltData)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.