Nothing
#' Global Identifier Macros
#'
#' @description
#' The macro \code{.AMID} contains the string \code{c("ISO3", "Series")} denoting
#' the names of ID variables that identify the cross-sectional dimension in the database.
#'
#' The macro \code{.AMT} contains the string
#' \code{c("Date", "Year", "Quarter", "FY", "QFY", "Month", "Day")} denoting temporal identifiers generated by \code{\link{am_expand_date}}.
#' The "Date" variable is sufficient to uniquely identify a point in time in the database.
#'
#' Each value in the database is uniquely identified by ISO3, Series and Date.
#'
#' @usage
#' .AMID
#' .AMT
#'
#' @examples
#' .AMID
#' .AMT
#'
#'
#' @seealso \code{\link{africamonitor}}
#'
#' @format NULL
#' @aliases .AMID .AMT
#' @export .AMID
#' @export .AMT
.AMID <- c("ISO3", "Series")
.AMT <- c("Date", "Year", "Quarter", "FY", "QFY", "Month", "Day")
#' Dataset of Countries in the Database
#'
#' \code{am_countries} is a data.frame containing standardized codes of 55 African countries
#' (including Western Sahara) according to various classifications
#' and regional aggregations. \code{am_countries_wld} provides the same information for 195
#' countries, which includes the 193 UN Member States, Western Sahara and Taiwan. The API
#' generally provides data for all 195 countries, but by default only requests data for Africa.
#' Note that the API (\code{\link{am_data}}) only supports "ISO3" character codes.
#'
#' @format A data frame with 55 (wld = 195) rows and 9 variables (sorted by Country):
#' \describe{
#' \item{Country}{Short Country Name}
#' \item{Country_ISO}{ISO Standardized Country Name}
#' \item{ISO2}{ISO 2-Character Country Code}
#' \item{ISO3}{ISO 3-Character Country Code}
#' \item{ISO3N}{ISO Numeric Country Code}
#' \item{IMF}{IMF Numeric Country Code}
#' \item{Region}{2-Region Classification (UN except for Sudan)}
#' \item{Region_Detailed}{5-Region Classification (Former World Bank)}
#' \item{Currency}{Main Official Currency}
#' }
#' @source \href{https://CRAN.R-project.org/package=countrycode}{countrycode} R package (with some modification of regional aggregates).
#' @seealso \code{\link{am_entities}}, \code{\link{am_sources}}, \code{\link{am_series}}, \code{\link{africamonitor}}
#' @examples
#' head(am_countries)
"am_countries"
#' @rdname am_countries
"am_countries_wld"
#' Dataset of African Economic and Regional Entities
#'
#' A dataset mapping African countries to various economic and regional entities.
#'
#' @format A data frame with 54 rows (one for each country, excluding Western Sahara) and 27 variables, of which 6 are country identifiers and the remaining 21
#' are logical variables indicating country membership to various economic and regional entities.
#'
#' @source Own compilation.
#' @seealso \code{\link{am_countries}}, \code{\link{am_sources}}, \code{\link{am_series}}, \code{\link{africamonitor}}
#' @examples
#' head(am_entities)
"am_entities"
# #' Reconnect to Database
# #'
# #' This function terminates an existing connection to the database server and attempts to reconnect to it.
# #' It is now somewhat redundant by the safe query mechanism introduced in v0.2.1 of the package, where each query is evaluated inside
# #' \code{\link{tryCatch}} and the database connection is renewed if the query fails. This function can still be used to manually
# #' renew the database connection.
# #' @examples \donttest{
# #' am_reconnect()
# #' }
# #' @seealso \code{\link{africamonitor}}
# #'
# #' @export am_reconnect
am_reconnect <- function() {
if(length(.am_con)) tryCatch(dbDisconnect(.am_con), error = function(e) cat(""))
assignInMyNamespace(".am_con", .connect())
}
.am_con <- NULL # Needed for initialization
safe_query <- function(query) {
if(is.null(.am_con)) am_reconnect() # Initial connection
tryCatch(dbGetQuery(.am_con, query), error = function(e) {
am_reconnect()
tryCatch(dbGetQuery(.am_con, query), error = function(e) NULL)
})
}
#' Retrieve Data Sources Table
#'
#' This function retrieves a table with information about the sources of data in the database, and when data from different sources was updated.
#'
#' The data source table gives information about the various sources / providers of data in this database, including the source website, frequency and time coverage of data, a description of the source, when data from the source was updated and the way data is accessed from the source.
#' @param ordered logical. \code{TRUE} orders the result in the order data was entered into the database, while \code{FALSE} returns the result in a random order, to the benefit of faster query execution.
#' @return A \code{\link[data.table]{data.table}} with information about the sources of data in the database.
#' @examples \donttest{
#' am_sources()
#' }
#' @seealso \code{\link{am_countries}}, \code{\link{am_series}}, \code{\link{africamonitor}}
#'
#' @export am_sources
am_sources <- function(ordered = TRUE) {
query <- "SELECT DSID, Source, Url, NSeries, Frequency, Data_From, Data_To, Description, Updated, Access FROM DATASOURCE"
if(ordered) query <- paste(query, "ORDER BY DS_Order")
res <- safe_query(query)
if(is.null(res) || !fnrow(res)) {
message("Query resulted in empty dataset. This means something is wrong with your internet connection, the connection to the database or with the database itself.")
return(res)
}
res$Data_From <- as.Date(res$Data_From)
res$Data_To <- as.Date(res$Data_To)
res$Updated <- as.Date(res$Updated)
setDT(res)
return(res)
}
#' Retrieve Series Table
#'
#' This function pulls information about the data series available in the database.
#'
#' @param dsid character. (Optional) id's of datasources matching the 'DSID' column of the data sources table (retrieved using \code{\link[=am_sources]{am_sources()}}) for which series information is to be returned.
#' @param source.info logical. \code{TRUE} returns additional information from the data sources table (the source, the frequency of the data and when it was last updated).
#' @param ordered logical. \code{TRUE} returns the series in a fixed order, while \code{FALSE} returns the result in a random order, to the benefit of faster query execution.
#' @param return.query logical. \code{TRUE} will not query the database but instead return the constructed SQL query as a character string (for debugging purposes).
#'
#' @details The series table gives information about all of the time series in the database. Each series is given a unique code, and
#' has a label describing the series. Further information recorded are the minimum and maximum time coverage, and (optionally) a separate series source and url.
#' The default \code{source.info = TRUE} adds the source, the frequency of the data (homogeneous within source), and the date when the source was last updated.
#'
#'
#' @return A \code{\link[data.table]{data.table}} with information about the available series in the database.
#' @examples \donttest{
#' # By default returns all series with additional information
#' am_series()
#'
#' # Raw series table
#' am_series(source.info = FALSE)
#'
#' # Only series in the WEO
#' am_series("IMF_WEO")
#' }
#' @seealso \code{\link{am_countries}}, \code{\link{am_sources}}, \code{\link{am_data}}, \code{\link{africamonitor}}
#'
#' @export am_series
am_series <- function(dsid = NULL, source.info = TRUE, ordered = TRUE, return.query = FALSE) {
if(source.info) {
query <- "SELECT DSID, Series, Label, Topic, S_Frequency, S_From, S_To, Nctry, Avg_Obs, Updated, Source, S_Description, S_Source, S_Url FROM SERIES NATURAL JOIN DATASOURCE" # , DS_Url
} else {
query <- "SELECT Series, Label, Topic, S_Frequency, S_From, S_To, Nctry, Avg_Obs, S_Description, S_Source, S_Url FROM SERIES"
}
if(length(dsid)) {
query <- if(length(dsid) == 1L) paste0(query, " WHERE DSID = '", dsid, if(ordered) "' ORDER BY S_Order" else "'") else
paste0(query, " WHERE DSID IN ('", paste(dsid, collapse = "', '"), if(ordered) "') ORDER BY S_Order" else "')")
} else if(ordered) query <- paste(query, "ORDER BY S_Order")
if(return.query) return(query)
res <- safe_query(query)
if(is.null(res) || !fnrow(res)) {
message("Query resulted in empty dataset. Please make sure the DSID exists by checking against the am_sources() table. Alternatively check your connection to the database.")
return(res)
}
res$S_From <- as.Date(res$S_From)
res$S_To <- as.Date(res$S_To)
if(source.info) res$Updated <- as.Date(res$Updated)
setDT(res)
return(res)
}
#' Generate Temporal Identifiers from a Date Column
#'
#' This function expands a date column and generates additional temporal identifiers from it (such as the year, month, quarter, fiscal year etc.).
#'
#' @param x either a vector of class 'Date', or coercible to date using \code{\link[base]{as.Date}}, or a data frame / list containing with a date-column called \code{name}.
#' @param gen character. A vector of identifiers to generate from \code{x}. The possible identifiers are found in \code{\link{.AMT}}.
#' @param origin character / Date. Passed to \code{\link[base]{as.Date}}: for converting numeric \code{x} to date.
#' @param keep.date logical. \code{TRUE} will keep the date variable in the resulting dataset, \code{FALSE} will remove the date variable in favor of the generated identifiers.
#' @param remove.missing.date logical. \code{TRUE} will remove missing values in \code{x}. If \code{x} is a dataset, rows missing the date variable will be removed.
#' @param sort logical. \code{TRUE} will sort the data by the date column.
#' @param as.factor \code{TRUE} will generate quarters, fiscal years and months ('Quarter', 'FY', 'QFY', 'Month') as factor variables. It is also possible to use \code{as.factor = "ordered"} to generate ordered factors.
#' \code{FALSE} will generate fiscal years as character and quarters and months as integer variables.
#' @param name character. The name of the date variable to expand.
#' @param \dots not used.
#'
#' @return A \code{\link[data.table]{data.table}} containing the computed identifiers as columns. See Examples.
#'
#' @examples
#' # First a basic example
#' x <- seq.Date(as.Date("1999-01-01"), as.Date("2000-01-01"), by = "month")
#' am_expand_date(x)
#' am_expand_date(x, gen = .AMT[-1L], keep.date = FALSE)
#' \donttest{
#' # Now using the API
#' am_expand_date(am_data("KEN"))
#'
#' # Same thing
#' am_data("KEN", expand.date = TRUE)
#' }
#'
#' @seealso \code{\link{am_as_date}}, \code{\link{africamonitor}}
#' @export am_expand_date
am_expand_date <- function(x, gen = c("Year", "Quarter", "Month"), origin = "1899-12-30",
keep.date = TRUE, remove.missing.date = TRUE, sort = TRUE,
as.factor = TRUE, name = "Date", ...) {
lxl <- FALSE
genopts <- c('Year', 'Quarter', 'FY', 'QFY', 'Month', 'Day')
genlab <- c('Year', 'Quarter', 'Fiscal Year (July - June)', 'Quarter of Fiscal Year', 'Month', 'Day')
ordered <- is.character(as.factor) && as.factor == "ordered"
as.factor <- ordered || isTRUE(as.factor)
if(is.numeric(gen)) {
lab <- genlab[gen]
gen <- genopts[gen]
} else if(is.character(gen)) {
lab <- genlab[ckmatch(gen, genopts, "Unknown gen option:")]
} else stop("gen must be integer or character")
if(is.list(x)) {
lxl <- TRUE
ind <- ckmatch(name, names(x), "No column named:")
l <- .subset(x, -ind)
x <- .subset2(x, ind)
}
getFY <- function(y, m) {
fy <- fifelse(as.integer(m) >= 7L, y, y - 1L)
fy <- paste(fy, substr(as.character(fy + 1L), 3L, 4L), sep = "/")
if(as.factor) qF(fy, ordered = ordered) else fy # , na.exclude = FALSE
}
getQ <- function(m) {
Q <- ceiling(m / 3L)
if(as.factor) structure(Q, levels = paste0("Q", 1:4), class = c(if(ordered) "ordered", "factor")) else Q # , "na.included"
}
getFQ <- function(m) {
mod <- (m + 6L) %% 12L
mod[mod == 0L] <- 12L
getQ(mod)
}
if(!inherits(x, "Date"))
x <- if(is.numeric(x)) as.Date(x, origin) else as.Date(x)
if(remove.missing.date && anyNA(x)) {
nna <- whichNA(x, invert = TRUE)
x <- x[nna]
if(lxl) l <- ss(l, nna)
}
if(!lxl && sort && is.unsorted(x)) x <- sort(x)
xp <- as.POSIXlt(x)
y <- xp$year + 1900L
m <- xp$mon + 1L
res <- lapply(setNames(as.vector(gen, "list"), gen), switch,
Year = y,
Quarter = getQ(m),
FY = getFY(y, m),
QFY = getFQ(m),
Month = if(as.factor) structure(m, levels = month.name, class = c(if(ordered) "ordered", "factor")) else m, # , "na.included"
Day = xp$mday,
stop("Unknown gen option"))
vlabels(res) <- lab
if(keep.date) res <- c(list(Date = `attr<-`(x, "label", "Date")), res)
if(lxl) {
res <- c(res, l)
if(any(wc <- names(res) == "ISO3")) res <- c(res["ISO3"], res[!wc])
if(sort && is.unsorted(x))
res <- ss(res, order(x, method = "radix"))
}
return(qDT(res)) # need qDT here, res is a plain list...
}
#' Coerce Vectors to Dates
#'
#' This function coerces date strings i.e. \code{"YYYY-MM-DD"} or \code{"YYYY-MM"}, years e.g. \code{2015} (numeric or character),
#' year-quarters e.g. \code{"2015Q1"} or \code{"2015-Q1"}, year-months e.g. \code{"2015M01"} or \code{"2015-M01"}, fiscal years e.g. \code{"1997/98"} or numeric values representing dates (e.g. previously imported Excel date) to a regular R date.
#'
#' @param x a character date string \code{"YYYY-MM-DD"} or \code{"YYYY-MM"}, year-quarter \code{"YYYYQN"} or \code{"YYYY-QN"}, year-month \code{"YYYYMNN"} or \code{"YYYY-MNN"}, fiscal year \code{"YYYY/YY"} or calendar year \code{YYYY} (numeric or character), or a numeric value corresponding to a date that can be passed to \code{\link[base]{as.Date.numeric}}.
#' @param end logical. \code{TRUE} replaces missing time information with a period end-date which is the last day of the period. \code{FALSE} replaces missing month and day information with \code{"-01"},
#' so the year date is the 1st of January, the fiscal year date the 1st of July, and for months / quarters the 1st day of the month / quarter.
#' @param origin a date or date-string that can be used as reference for converting numeric values to dates. The default corresponds to dates generated in Excel for Windows. See \code{\link[base]{as.Date.numeric}}.
#'
#' @return A \code{\link[base]{Date}} vector.
#' @examples
#' am_as_date("2011-05")
#' am_as_date(2011)
#' am_as_date("2011/12")
#' am_as_date("2011/12", end = TRUE)
#' am_as_date("2011Q1")
#' am_as_date("2011Q1", end = TRUE)
#'
#' @seealso \code{\link{am_expand_date}}, \code{\link{africamonitor}} %, \code{\link{times}}
#' @export am_as_date
# Note: Before did all days ending on 28th
am_as_date <- function(x, end = FALSE, origin = "1899-12-30") {
x1 <- x[1L]
ncx <- nchar(x1)
if(ncx == 4L) return(as.Date(paste0(x, if(end) "-12-31" else "-01-01")))
if(is.numeric(x)) return(as.Date.numeric(x, origin))
if(ncx >= 6L && ncx <= 8L) { # could be "1999/1"
s5 <- substr(x1, 5L, 5L)
if(s5 == "/") {
x <- if(end) paste0(as.integer(substr(x, 1L, 4L)) + 1L, "-06-30") else
paste0(substr(x, 1L, 4L), "-07-01")
} else if(s5 == "Q" || substr(x1, 6L, 6L) == "Q") {
Q <- as.integer(substr(x, ncx, ncx))
x <- if(end) paste0(substr(x, 1L, 4L), "-", Q * 3L, fifelse(as.logical(Q %% 2L), "-31", "-30")) else # fifelse requires logical argument
paste0(substr(x, 1L, 4L), "-", Q * 3L - 2L, "-01")
} else {
if(s5 == "M" || substr(x1, 6L, 6L) == "M") {
st <- if(s5 == "M") 6L else 7L
M <- substr(x, st, st + 1L)
x <- paste0(substr(x, 1L, 4L), "-", M, "-01")
} else { # Assuming now Year-Month type string
if(ncx != 8L) x <- paste0(x, "-01")
}
if(end) return(as.Date(format(as.Date(x) + 31L, "%Y-%m-01")) - 1L)
}
}
return(as.Date(x))
}
#' Retrieve Data from the Database
#'
#' This is the main function of the package to retrieve data from the database. % It constructs an SQL query which is sent to the database and returns the data as a \code{\link[data.table]{data.table}} in R.
#'
#' @param ctry character. (Optional) the ISO3 code of countries (see \code{\link{am_countries}}). Default is to load data for all African countries. Putting \code{NULL} gets data for all countries (codes available in \code{\link{am_countries_wld}}).
#' @param series character. (Optional) codes of series matching the 'Series' column of the series table (retrieved using \code{\link[=am_series]{am_series()}}).
#' @param from set the start time of the data retrieved by either supplying a start date, a date-string of the form \code{"YYYY-MM-DD"} or \code{"YYYY-MM"},
#' year-quarters of the form \code{"YYYYQN"} or \code{"YYYY-QN"}, a numeric year \code{YYYY} (numeric or character), or a fiscal year of the form \code{"YYYY/YY"}. These expressions are converted to a regular date by \code{\link{am_as_date}}.
#' @param to same as \code{from}: to set the time period until which data is retrieved. For expressions that are not full "YYYY-MM-DD" dates, the last day of the period is chosen.
#' @param labels logical. \code{TRUE} will also return labels (series descriptions) along with the series codes.
#' @param wide logical. \code{TRUE} calls \code{\link{am_pivot_wider}} on the result. \code{FALSE} returns the data in a long format without missing values (suitable for \code{ggplot2}).
#' @param expand.date logical. \code{TRUE} will call \code{\link{am_expand_date}} on the result.
#' @param drop.1iso3c logical. If only one country is selected through \code{ctry}, \code{TRUE} will drop the 'ISO3' column in the output.
#' @param ordered logical. \code{TRUE} orders the result by 'Date' and, if \code{labels = TRUE}, by series, maintaining a fixed column-order of series.
#' \code{FALSE} returns the result in a random order, to the benefit of faster query execution.
#' @param return.query logical. \code{TRUE} will not query the database but instead return the constructed SQL query as a character string (for debugging purposes).
#' @param \dots further arguments passed to \code{\link{am_pivot_wider}} (if \code{wide = TRUE}) or \code{\link{am_expand_date}} (if \code{expand.date = TRUE}).
#'
#' @details If \code{labels = FALSE}, the series table is not joined to the data table, and \code{ordered = TRUE} will order series retrieved in alphabetic order.
#' If \code{labels = TRUE} data is ordered by series and date, preserving the order of columns in the dataset. If multiple countries are received they are ordered alphabetically according to the 'ISO3' column.
#'
#' Series at different frequencies can be queried, but, if \code{wide = TRUE}, this will result in missing values for all but the first observations per period in the lower frequency series.
#'
#' @return A \code{\link[data.table]{data.table}} with the result of the query.
#'
#' @examples \donttest{
#' # Return all indicators for Kenya from 2000
#' am_data("KEN", from = 2000)
#'
#' # Return all indicators for Kenya from 2000 in long format
#' am_data("KEN", from = 2000, wide = FALSE)
#'
#' # Return with date expanded
#' am_data("KEN", from = 2000, expand.date = TRUE)
#'
#' # Same thing in multiple steps (with additional customization options):
#' am_data("KEN", from = 2000, wide = FALSE) |> am_pivot_wider() |> am_expand_date()
#'
#' # Getting only GDP growth
#' am_data("KEN", "NGDP_RPCH", from = 2000)
#'
#' # Getting GDP growth for all countries
#' am_data(series = "NGDP_RPCH", from = 2000)
#'
#' # Reshaping to wider format
#' am_data(series = "NGDP_RPCH", from = 2000) |>
#' am_pivot_wider(id_cols = "Date",
#' names_from = "ISO3",
#' values_from = "NGDP_RPCH")
#'
#' # Getting growth and inflation for the EAC countries (all available years)
#' am_data(ctry = c("UGA", "KEN", "TZA", "RWA", "BDI", "SSD"),
#' series = c("NGDP_RPCH", "PCPIPCH"))
#'
#' }
#' @seealso \code{\link{am_pivot_wider}}, \code{\link{am_expand_date}}, \code{\link{africamonitor}}
#' @export am_data
#'
am_data <- function(ctry = africamonitor::am_countries$ISO3,
series = NULL, from = NULL, to = NULL, # , dsid = NULL
labels = TRUE, wide = TRUE, expand.date = FALSE, drop.1iso3c = TRUE,
ordered = TRUE, return.query = FALSE, ...) {
c0 <- is.null(ctry)
s0 <- is.null(series)
if(!c0 && anyDuplicated(ctry)) stop("duplicated country code: ", paste(ctry[duplicated(ctry)], collapse = ", "))
if(!s0 && anyDuplicated(series)) stop("duplicated series code: ", paste(series[duplicated(series)], collapse = ", "))
if(labels) {
data <- "DATA NATURAL JOIN SERIES"
lab <- ", Label"
} else {
data <- "DATA"
lab <- ""
}
if(!c0 && drop.1iso3c && length(ctry) == 1L) { # This is good -> more intelligent
cond <- paste0("ISO3 = '", ctry, "'")
vars <- paste0("Date, Series", lab, ", Value")
if(ordered) ord <- if(labels) "S_Order, Date" else "Series, Date"
} else {
cond <- if(c0) "" else paste0("ISO3 IN ('", paste(ctry, collapse = "', '"), "')")
vars <- paste0("ISO3, Date, Series", lab,", Value")
if(ordered) ord <- if(labels) "ISO3, S_Order, Date" else "ISO3, Series, Date"
}
if(!s0) {
add <- if(c0) "" else " AND "
if(length(series) == 1L) {
# vars <- sub(", Series", "", vars) # This is inconvenient -> cannot use am_pivot_wider anymore
cond <- paste0(cond, add, "Series = '", series, "'")
} else {
cond <- paste0(cond, add, "Series IN ('", paste(series, collapse = "', '"), "')")
}
}
if(length(from)) cond <- paste0(cond, " AND Date >= '", am_as_date(from) , "'")
if(length(to)) cond <- paste0(cond, " AND Date <= '", am_as_date(to, end = TRUE) , "'")
where <- if(c0 && s0) "" else "WHERE"
query <- if(ordered) paste("SELECT", vars, "FROM", data, where, cond, "ORDER BY", ord) else
paste("SELECT", vars, "FROM", data, where, cond)
if(return.query) return(query)
res <- safe_query(query)
if(is.null(res) || !fnrow(res)) {
message("Query resulted in empty dataset. Please make sure that the ISO3, series-codes or the date-range supplied in your query are consistent with the available data. See am_sources() and am_series(). Alternatively check your connection to the database.")
return(res)
}
res$Date <- as.Date(res$Date)
setDT(res)
if(wide) {
res <- am_pivot_wider(res, ...) # Could optimize to omit ORDER BY clause if wide and length(series) > 1L
if(length(series) > 1L) setcolorder(res, c(if(c0 || length(ctry) > 1L || !drop.1iso3c) "ISO3", "Date", series))
}
if(expand.date) return(am_expand_date(res, ...)) else return(res)
}
#' Reshape Long API Data to Column-Based Format
#'
#' This function automatically reshapes long (stacked) raw data from the API (\code{\link[=am_data]{am_data(..., wide = FALSE)}}) to a wide format where each variable has its own column.
#' It can also be used as a general purpose reshaping command - with an additional capability to handle variable labels.
#'
#' @param data raw data from the API: A long format data frame where all values are stacked in a value column.
#' @param id_cols character. Temporal identifiers of the data. By default all variables in \code{\link{.AMT}} and "ISO3" are selected.
#' @param names_from character. The column containing the series codes. These will become the names of the new columns in the wider data format.
#' @param values_from character. The column containing the data values.
#' @param labels_from character. The column containing the labels describing the series.
#' @param expand.date logical. \code{TRUE} will call \code{\link{am_expand_date}} on the data after reshaping.
#' @param \dots further arguments passed to \code{\link[collapse]{pivot}} or \code{\link{am_expand_date}}.
#'
#' @return A \code{\link[data.table]{data.table}} with the reshaped data.
#'
#' @examples \donttest{
#' # Return all indicators for Kenya and Nigeria from the year 2000 onwards
#' am_pivot_wider(am_data(c("KEN", "NGA"), from = 2000, wide = FALSE))
#' }
#' @seealso \code{\link{am_pivot_longer}}, \code{\link{africamonitor}}
#'
#' @export am_pivot_wider
#'
am_pivot_wider <- function(data, id_cols = intersect(c("ISO3", .AMT), names(data)), # setdiff(names(data), c("Series", "Label", "Value")),
names_from = "Series", values_from = "Value",
labels_from = if(any(names(data) == "Label")) "Label" else NULL,
expand.date = FALSE, ...) {
res <- pivot_ellipsis(data, id_cols, values_from, names_from, labels_from, how = "wider", sort = "ids", ...) # collapse::pivot
if(expand.date) return(am_expand_date(res, ...)) else return(res)
}
# This is to allow ellipsis (...) arguments to be passed to both pivot and am_expand_data
pivot_ellipsis <- function(data, ids = NULL, values = NULL, names = NULL, labels = NULL,
how = "longer", na.rm = FALSE, factor = c("names", "labels"),
check.dups = FALSE, nthreads = 1L, fill = NULL,
drop = TRUE, sort = FALSE, transpose = FALSE, ...) {
pivot(data, ids = ids, values = values, names = names,
labels = labels, how = how, na.rm = na.rm, factor = factor,
check.dups = check.dups, nthreads = nthreads, fill = fill,
drop = drop, sort = sort, transpose = transpose)
}
#' Reshape Column-Based Data to Long Format
#'
#' This function automatically reshapes wide (column-based) data into a long format akin to the format of the raw data coming from the database (\code{\link[=am_data]{am_data(..., wide = FALSE)}}).
#' It can also be used as a general purpose reshaping command - with an additional capability to handle variable labels.
#'
#' @param data a wide format data frame where all series have their own column.
#' @param id_cols character. Temporal identifiers of the data. By default all variables in \code{\link{.AMT}} and "ISO3" are selected.
#' @param to_value character. The names of all series to be stacked into the long format data frame.
#' @param variable_name character. The name of the variable to store the names of the series.
#' @param value_name character. The name of the variable to store the data values.
#' @param label_name character. The name of the variable to store the series labels.
#' @param na.rm logical. \code{TRUE} will remove all missing values from the long data frame.
#' @param variable.factor,label.factor logical. \code{TRUE} will code the "Series" and "Label" columns as factors, which is more memory efficient.
#' @param \dots further arguments passed to \code{\link[data.table]{melt}}.
#'
#' @return A \code{\link[data.table]{data.table}} with the reshaped data.
#'
#' @examples \donttest{
#' # Return all indicators for Kenya and Nigeria from the year 2000 onwards
#' data <- am_data(c("KEN", "NGA"), from = 2000)
#' am_pivot_longer(data)
#' }
#' @seealso \code{\link{am_pivot_wider}}, \code{\link{africamonitor}}
#'
#' @export am_pivot_longer
#'
am_pivot_longer <- function(data, id_cols = intersect(c("ISO3", .AMT), names(data)),
to_value = setdiff(names(data), id_cols),
variable_name = "Series", value_name = "Value",
label_name = "Label", na.rm = TRUE, variable.factor = TRUE,
label.factor = TRUE, ...) {
if(length(label_name)) labs <- vlabels(.subset(data, to_value), use.names = FALSE)
res <- melt(qDT(data), id_cols, to_value, variable_name, value_name,
na.rm = na.rm, variable.factor = variable.factor, ...)
vlabels(res) <- NULL # needed
if(length(label_name) && !allNA(labs)) {
if(label.factor) {
al <- list(levels = labs, class = "factor")
labs <- if(na.rm) rep(seq_along(labs), fnobs(.subset(data, to_value))) else
rep(seq_along(labs), each = fnrow(data))
attributes(labs) <- al
} else {
# melt always preserves column order (otherwise could rep with names and match series column)
labs <- if(na.rm) rep(labs, fnobs(.subset(data, to_value))) else
rep(labs, each = fnrow(data))
}
add_vars(res, fncol(res)) <- setNames(list(labs), label_name)
}
return(res)
}
# all.equal(roworderv(am_data("bou_mmi", from = 2000, to = 2005)),
# roworderv(am_pivot_longer(am_pivot_wider(am_data("bou_mmi", from = 2000, to = 2005)))))
#'
#' #' Transpose a Wide Dataset to a Row-Based Format
#' #'
#' #' This function is called by \code{\link{am_write_excel}} with option \code{transpose = TRUE} to generate a row-based tabular data format from a wide data frame in R that is suitable for exporting to Excel.
#' #'
#' #' @param data a wide format data frame where each column is a variable and the first variable uniquely identifies the data.
#' #' @param date.format a format for date columns which is passed to \code{\link[base]{format.Date}}. When transposing wide, dates are converted to character. The default R YYYY-MM-DD format for dates is often not recognized by Excel. By default dates are transformed to DD/MM/YYYY format which Excel (UK English) recognizes. Putting \code{FALSE} here does not transform dates into another format.
#' #'
#' #' @return A transposed data frame or \code{\link[data.table]{data.table}} (the class of the input is preserved).
#' #'
#' #' @examples \donttest{
#' #' am_transpose(am_data("BOU_CPI"))
#' #' }
#' #' @seealso \code{\link[data.table]{transpose}}, \code{\link{am_pivot_wider}}, \code{\link{am_write_excel}}, \code{\link{africamonitor}}
#' #' @export am_transpose
#'
#' am_transpose <- function(data, date.format = "%d/%m/%Y") {
#' nnum <- cat_vars(data, "logical")[-1L]
#' lab <- vlabels(data)[-1L]
#'
#' if(is.character(date.format) && length(dv <- date_vars(data, "indices"))) {
#' get_vars(data, dv) <- if(length(dv) == 1L) format(.subset2(data, dv), date.format) else
#' lapply(.subset(data, dv), format, date.format)
#' }
#'
#' if(any(nnum) && !all(nnum)) {
#' res <- list(`numeric data` = add_vars(transpose(get_vars(data, c(TRUE, !nnum)),
#' make.names = 1L, keep.names = "Variable"),
#' list(Label = lab[!nnum]), pos = 2L),
#' `non-numeric data` = add_vars(transpose(get_vars(data, c(TRUE, nnum)),
#' make.names = 1L, keep.names = "Variable"),
#' list(Label = lab[nnum]), pos = 2L))
#' } else {
#' res <- transpose(data, make.names = 1L, keep.names = "Variable")
#' add_vars(res, 2L) <- list(Label = lab)
#' }
#'
#' attr(res, "transposed") <- TRUE
#' if(inherits(data, "data.table")) {
#' if(inherits(res, "data.table")) return(qDT(res, keep.attr = TRUE))
#' for(i in 1:2) setDT(res[[i]])
#' }
#' return(res)
#' }
#' #' Export Wide Data to Excel
#' #'
#' #' This function exports a wide format dataset to a column- (default) or row-oriented Excel format.
#' #'
#' #' @param data a wide dataset from \code{\link{am_data}} or reshaped to a wide format with \code{\link{am_pivot_wider}}.
#' #' @param \dots further arguments to \code{\link[writexl]{write_xlsx}}. As a minimum a path needs to be supplied that ends with the name of the Excel file. See Examples.
#' #' @param transpose logical. If \code{TRUE}, the result is returned in a row-oriented Excel format. The default is column oriented (same as the dataset in R).
#' #' @param transpose.date.format argument passed to \code{\link{am_transpose}}, setting the format of date columns when data is transposed.
#' #'
#' #' @examples
#' #' \dontrun{
#' #' # Getting macroeconomic indicators from Bank of Uganda in fiscal years
#' #' data <- am_data("BOU_MMI_FY", from = "2000/01")
#' #'
#' #' # Saving to different Excel formats
#' #' am_write_excel(data, "BOU_MMI_FY.xlsx")
#' #' am_write_excel(data, "BOU_MMI_FY.xlsx", transpose = TRUE)
#' #'
#' #' # Saving to alternative path
#' #' am_write_excel(data, "C:/Users/.../BOU_MMI_FY.xlsx")
#' #' }
#' #'
#' #' @seealso \code{\link{am_transpose}}, \code{\link[writexl]{write_xlsx}}, \code{\link{africamonitor}}
#' #'
#' #' @export am_write_excel
#' #'
#'
#' am_write_excel <- function(data, ..., transpose = FALSE, transpose.date.format = "%d/%m/%Y") {
#' ntpl <- !isTRUE(attr(data, "transposed"))
#' if(transpose && ntpl) {
#' res <- am_transpose(data, transpose.date.format)
#' } else if(ntpl) {
#' res <- list(data = data, labels = namlab(data))
#' } else {
#' res <- data
#' }
#' if(missing(...)) stop("Need to at least provide path to file. See help(write_xlsx).")
#' return(write_xlsx(res, ...))
#' }
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.