R/accessdb.R

Defines functions am_pivot_longer pivot_ellipsis am_pivot_wider am_data am_as_date am_expand_date am_series am_sources safe_query am_reconnect

Documented in am_as_date am_data am_expand_date am_pivot_longer am_pivot_wider am_series am_sources

#' Global Identifier Macros
#'
#' @description
#' The macro \code{.AMID} contains the string \code{c("ISO3", "Series")} denoting
#' the names of ID variables that identify the cross-sectional dimension in the database.
#'
#' The macro \code{.AMT} contains the string
#' \code{c("Date", "Year", "Quarter", "FY", "QFY", "Month", "Day")} denoting temporal identifiers generated by \code{\link{am_expand_date}}.
#' The "Date" variable is sufficient to uniquely identify a point in time in the database.
#'
#' Each value in the database is uniquely identified by ISO3, Series and Date.
#'
#' @usage
#' .AMID
#' .AMT
#'
#' @examples
#' .AMID
#' .AMT
#'
#'
#' @seealso \code{\link{africamonitor}}
#'
#' @format NULL
#' @aliases .AMID .AMT
#' @export .AMID
#' @export .AMT

.AMID <- c("ISO3", "Series")
.AMT <- c("Date", "Year", "Quarter", "FY", "QFY", "Month", "Day")


#' Dataset of Countries in the Database
#'
#' \code{am_countries} is a data.frame containing standardized codes of 55 African countries
#' (including Western Sahara) according to various classifications
#' and regional aggregations. \code{am_countries_wld} provides the same information for 195
#' countries, which includes the 193 UN Member States, Western Sahara and Taiwan. The API
#' generally provides data for all 195 countries, but by default only requests data for Africa.
#' Note that the API (\code{\link{am_data}}) only supports "ISO3" character codes.
#'
#' @format A data frame with 55 (wld = 195) rows and 9 variables (sorted by Country):
#' \describe{
#'   \item{Country}{Short Country Name}
#'   \item{Country_ISO}{ISO Standardized Country Name}
#'   \item{ISO2}{ISO 2-Character Country Code}
#'   \item{ISO3}{ISO 3-Character Country Code}
#'   \item{ISO3N}{ISO Numeric Country Code}
#'   \item{IMF}{IMF Numeric Country Code}
#'   \item{Region}{2-Region Classification (UN except for Sudan)}
#'   \item{Region_Detailed}{5-Region Classification (Former World Bank)}
#'   \item{Currency}{Main Official Currency}
#' }
#' @source \href{https://CRAN.R-project.org/package=countrycode}{countrycode} R package (with some modification of regional aggregates).
#' @seealso \code{\link{am_entities}}, \code{\link{am_sources}}, \code{\link{am_series}}, \code{\link{africamonitor}}
#' @examples
#' head(am_countries)
"am_countries"

#' @rdname am_countries
"am_countries_wld"

#' Dataset of African Economic and Regional Entities
#'
#' A dataset mapping African countries to various economic and regional entities.
#'
#' @format A data frame with 54 rows (one for each country, excluding Western Sahara) and 27 variables, of which 6 are country identifiers and the remaining 21
#' are logical variables indicating country membership to various economic and regional entities.
#'
#' @source Own compilation.
#' @seealso \code{\link{am_countries}}, \code{\link{am_sources}}, \code{\link{am_series}}, \code{\link{africamonitor}}
#' @examples
#' head(am_entities)
"am_entities"


# #' Reconnect to Database
# #'
# #' This function terminates an existing connection to the database server and attempts to reconnect to it.
# #' It is now somewhat redundant by the safe query mechanism introduced in v0.2.1 of the package, where each query is evaluated inside
# #' \code{\link{tryCatch}} and the database connection is renewed if the query fails. This function can still be used to manually
# #' renew the database connection.
# #' @examples \donttest{
# #' am_reconnect()
# #' }
# #' @seealso \code{\link{africamonitor}}
# #'
# #' @export am_reconnect

am_reconnect <- function() {
  if(length(.am_con)) tryCatch(dbDisconnect(.am_con), error = function(e) cat(""))
  assignInMyNamespace(".am_con", .connect())
}

.am_con <- NULL # Needed for initialization

safe_query <- function(query) {
  if(is.null(.am_con)) am_reconnect() # Initial connection
  tryCatch(dbGetQuery(.am_con, query), error = function(e) {
    am_reconnect()
    tryCatch(dbGetQuery(.am_con, query), error = function(e) NULL)
  })
}

#' Retrieve Data Sources Table
#'
#' This function retrieves a table with information about the sources of data in the database, and when data from different sources was updated.
#'
#' The data source table gives information about the various sources / providers of data in this database, including the source website, frequency and time coverage of data, a description of the source, when data from the source was updated and the way data is accessed from the source.
#' @param ordered logical. \code{TRUE} orders the result in the order data was entered into the database, while \code{FALSE} returns the result in a random order, to the benefit of faster query execution.
#' @return A \code{\link[data.table]{data.table}} with information about the sources of data in the database.
#' @examples \donttest{
#' am_sources()
#' }
#' @seealso \code{\link{am_countries}}, \code{\link{am_series}}, \code{\link{africamonitor}}
#'
#' @export am_sources

am_sources <- function(ordered = TRUE) {
  query <- "SELECT DSID, Source, Url, NSeries, Frequency, Data_From, Data_To, Description, Updated, Access FROM DATASOURCE"
  if(ordered) query <- paste(query, "ORDER BY DS_Order")
  res <- safe_query(query)
  if(is.null(res) || !fnrow(res)) {
    message("Query resulted in empty dataset. This means something is wrong with your internet connection, the connection to the database or with the database itself.")
    return(res)
  }
  res$Data_From <- as.Date(res$Data_From)
  res$Data_To <- as.Date(res$Data_To)
  res$Updated <- as.Date(res$Updated)
  setDT(res)
  return(res)
}

#' Retrieve Series Table
#'
#' This function pulls information about the data series available in the database.
#'
#' @param dsid character. (Optional) id's of datasources matching the 'DSID' column of the data sources table (retrieved using \code{\link[=am_sources]{am_sources()}}) for which series information is to be returned.
#' @param source.info logical. \code{TRUE} returns additional information from the data sources table (the source, the frequency of the data and when it was last updated).
#' @param ordered logical. \code{TRUE} returns the series in a fixed order, while \code{FALSE} returns the result in a random order, to the benefit of faster query execution.
#' @param return.query logical. \code{TRUE} will not query the database but instead return the constructed SQL query as a character string (for debugging purposes).
#'
#' @details The series table gives information about all of the time series in the database. Each series is given a unique code, and
#' has a label describing the series. Further information recorded are the minimum and maximum time coverage, and (optionally) a separate series source and url.
#' The default \code{source.info = TRUE} adds the source, the frequency of the data (homogeneous within source), and the date when the source was last updated.
#'
#'
#' @return A \code{\link[data.table]{data.table}} with information about the available series in the database.
#' @examples \donttest{
#' # By default returns all series with additional information
#' am_series()
#'
#' # Raw series table
#' am_series(source.info = FALSE)
#'
#' # Only series in the WEO
#' am_series("IMF_WEO")
#' }
#' @seealso \code{\link{am_countries}}, \code{\link{am_sources}}, \code{\link{am_data}}, \code{\link{africamonitor}}
#'
#' @export am_series

am_series <- function(dsid = NULL, source.info = TRUE, ordered = TRUE, return.query = FALSE) {
  if(source.info) {
    query <- "SELECT DSID, Series, Label, Topic, S_Frequency, S_From, S_To, Nctry, Avg_Obs, Updated, Source, S_Description, S_Source, S_Url FROM SERIES NATURAL JOIN DATASOURCE" # , DS_Url
  } else {
    query <- "SELECT Series, Label, Topic, S_Frequency, S_From, S_To, Nctry, Avg_Obs, S_Description, S_Source, S_Url FROM SERIES"
  }
  if(length(dsid)) {
    query <- if(length(dsid) == 1L) paste0(query, " WHERE DSID = '", dsid, if(ordered) "' ORDER BY S_Order" else "'") else
                                    paste0(query, " WHERE DSID IN ('", paste(dsid, collapse = "', '"), if(ordered) "') ORDER BY S_Order" else "')")
  } else if(ordered) query <- paste(query, "ORDER BY S_Order")

  if(return.query) return(query)
  res <- safe_query(query)
  if(is.null(res) || !fnrow(res)) {
    message("Query resulted in empty dataset. Please make sure the DSID exists by checking against the am_sources() table. Alternatively check your connection to the database.")
    return(res)
  }

  res$S_From <- as.Date(res$S_From)
  res$S_To <- as.Date(res$S_To)
  if(source.info) res$Updated <- as.Date(res$Updated)
  setDT(res)
  return(res)
}


#' Generate Temporal Identifiers from a Date Column
#'
#' This function expands a date column and generates additional temporal identifiers from it (such as the year, month, quarter, fiscal year etc.).
#'
#' @param x either a vector of class 'Date', or coercible to date using \code{\link[base]{as.Date}}, or a data frame / list containing with a date-column called \code{name}.
#' @param gen character. A vector of identifiers to generate from \code{x}. The possible identifiers are found in \code{\link{.AMT}}.
#' @param origin character / Date. Passed to \code{\link[base]{as.Date}}: for converting numeric \code{x} to date.
#' @param keep.date logical. \code{TRUE} will keep the date variable in the resulting dataset, \code{FALSE} will remove the date variable in favor of the generated identifiers.
#' @param remove.missing.date logical. \code{TRUE} will remove missing values in \code{x}. If \code{x} is a dataset, rows missing the date variable will be removed.
#' @param sort logical. \code{TRUE} will sort the data by the date column.
#' @param as.factor \code{TRUE} will generate quarters, fiscal years and months ('Quarter', 'FY', 'QFY', 'Month') as factor variables. It is also possible to use \code{as.factor = "ordered"} to generate ordered factors.
#' \code{FALSE} will generate fiscal years as character and quarters and months as integer variables.
#' @param name character. The name of the date variable to expand.
#' @param \dots not used.
#'
#' @return A \code{\link[data.table]{data.table}} containing the computed identifiers as columns. See Examples.
#'
#' @examples
#' # First a basic example
#' x <- seq.Date(as.Date("1999-01-01"), as.Date("2000-01-01"), by = "month")
#' am_expand_date(x)
#' am_expand_date(x, gen = .AMT[-1L], keep.date = FALSE)
#' \donttest{
#' # Now using the API
#' am_expand_date(am_data("KEN"))
#'
#' # Same thing
#' am_data("KEN", expand.date = TRUE)
#' }
#'
#' @seealso \code{\link{am_as_date}}, \code{\link{africamonitor}}
#' @export am_expand_date

am_expand_date <- function(x, gen = c("Year", "Quarter", "Month"), origin = "1899-12-30",
                        keep.date = TRUE, remove.missing.date = TRUE, sort = TRUE,
                        as.factor = TRUE, name = "Date", ...) {
  lxl <- FALSE
  genopts <- c('Year', 'Quarter', 'FY', 'QFY', 'Month', 'Day')
  genlab <- c('Year', 'Quarter', 'Fiscal Year (July - June)', 'Quarter of Fiscal Year', 'Month', 'Day')
  ordered <- is.character(as.factor) && as.factor == "ordered"
  as.factor <- ordered || isTRUE(as.factor)
  if(is.numeric(gen)) {
    lab <- genlab[gen]
    gen <- genopts[gen]
  } else if(is.character(gen)) {
    lab <- genlab[ckmatch(gen, genopts, "Unknown gen option:")]
  } else stop("gen must be integer or character")

  if(is.list(x)) {
    lxl <- TRUE
    ind <- ckmatch(name, names(x), "No column named:")
    l <- .subset(x, -ind)
    x <- .subset2(x, ind)
  }
  getFY <- function(y, m) {
    fy <- fifelse(as.integer(m) >= 7L, y, y - 1L)
    fy <- paste(fy, substr(as.character(fy + 1L), 3L, 4L), sep = "/")
    if(as.factor) qF(fy, ordered = ordered) else fy # , na.exclude = FALSE
  }
  getQ <- function(m) {
    Q <- ceiling(m / 3L)
    if(as.factor) structure(Q, levels = paste0("Q", 1:4), class = c(if(ordered) "ordered", "factor")) else Q # , "na.included"
  }
  getFQ <- function(m) {
    mod <- (m + 6L) %% 12L
    mod[mod == 0L] <- 12L
    getQ(mod)
  }
  if(!inherits(x, "Date"))
    x <- if(is.numeric(x)) as.Date(x, origin) else as.Date(x)
  if(remove.missing.date && anyNA(x)) {
    nna <- whichNA(x, invert = TRUE)
    x <- x[nna]
    if(lxl) l <- ss(l, nna)
  }
  if(!lxl && sort && is.unsorted(x)) x <- sort(x)
  xp <- as.POSIXlt(x)
  y <- xp$year + 1900L
  m <- xp$mon + 1L
  res <- lapply(setNames(as.vector(gen, "list"), gen), switch,
                Year = y,
                Quarter = getQ(m),
                FY = getFY(y, m),
                QFY = getFQ(m),
                Month = if(as.factor) structure(m, levels = month.name, class = c(if(ordered) "ordered", "factor")) else m, # , "na.included"
                Day = xp$mday,
                stop("Unknown gen option"))
  vlabels(res) <- lab
  if(keep.date) res <- c(list(Date = `attr<-`(x, "label", "Date")), res)
  if(lxl) {
    res <- c(res, l)
    if(any(wc <- names(res) == "ISO3")) res <- c(res["ISO3"], res[!wc])
    if(sort && is.unsorted(x))
      res <- ss(res, order(x, method = "radix"))
  }
  return(qDT(res)) # need qDT here, res is a plain list...
}


#' Coerce Vectors to Dates
#'
#' This function coerces date strings i.e. \code{"YYYY-MM-DD"} or \code{"YYYY-MM"}, years e.g. \code{2015} (numeric or character),
#' year-quarters e.g. \code{"2015Q1"} or \code{"2015-Q1"}, year-months e.g. \code{"2015M01"} or \code{"2015-M01"}, fiscal years e.g. \code{"1997/98"} or numeric values representing dates (e.g. previously imported Excel date) to a regular R date.
#'
#' @param x a character date string \code{"YYYY-MM-DD"} or \code{"YYYY-MM"}, year-quarter \code{"YYYYQN"} or \code{"YYYY-QN"}, year-month \code{"YYYYMNN"} or \code{"YYYY-MNN"}, fiscal year \code{"YYYY/YY"} or calendar year \code{YYYY} (numeric or character), or a numeric value corresponding to a date that can be passed to \code{\link[base]{as.Date.numeric}}.
#' @param end logical. \code{TRUE} replaces missing time information with a period end-date which is the last day of the period. \code{FALSE} replaces missing month and day information with \code{"-01"},
#' so the year date is the 1st of January, the fiscal year date the 1st of July, and for months / quarters the 1st day of the month / quarter.
#' @param origin a date or date-string that can be used as reference for converting numeric values to dates. The default corresponds to dates generated in Excel for Windows. See \code{\link[base]{as.Date.numeric}}.
#'
#' @return A \code{\link[base]{Date}} vector.
#' @examples
#' am_as_date("2011-05")
#' am_as_date(2011)
#' am_as_date("2011/12")
#' am_as_date("2011/12", end = TRUE)
#' am_as_date("2011Q1")
#' am_as_date("2011Q1", end = TRUE)
#'
#' @seealso \code{\link{am_expand_date}}, \code{\link{africamonitor}} %, \code{\link{times}}
#' @export am_as_date
# Note: Before did all days ending on 28th
am_as_date <- function(x, end = FALSE, origin = "1899-12-30") {
  x1 <- x[1L]
  ncx <- nchar(x1)
  if(ncx == 4L) return(as.Date(paste0(x, if(end) "-12-31" else "-01-01")))
  if(is.numeric(x)) return(as.Date.numeric(x, origin))
  if(ncx >= 6L && ncx <= 8L) { # could be "1999/1"
    s5 <- substr(x1, 5L, 5L)
    if(s5 == "/") {
      x <- if(end) paste0(as.integer(substr(x, 1L, 4L)) + 1L, "-06-30") else
                   paste0(substr(x, 1L, 4L), "-07-01")
    } else if(s5 == "Q" || substr(x1, 6L, 6L) == "Q") {
      Q <- as.integer(substr(x, ncx, ncx))
      x <- if(end) paste0(substr(x, 1L, 4L), "-", Q * 3L, fifelse(as.logical(Q %% 2L), "-31", "-30")) else # fifelse requires logical argument
                   paste0(substr(x, 1L, 4L), "-", Q * 3L - 2L, "-01")
    } else {
      if(s5 == "M" || substr(x1, 6L, 6L) == "M") {
        st <- if(s5 == "M") 6L else 7L
        M <- substr(x, st, st + 1L)
        x <- paste0(substr(x, 1L, 4L), "-", M, "-01")
      } else { # Assuming now Year-Month type string
        if(ncx != 8L) x <- paste0(x, "-01")
      }
      if(end) return(as.Date(format(as.Date(x) + 31L, "%Y-%m-01")) - 1L)
    }
  }
  return(as.Date(x))
}


#' Retrieve Data from the Database
#'
#' This is the main function of the package to retrieve data from the database. % It constructs an SQL query which is sent to the database and returns the data as a \code{\link[data.table]{data.table}} in R.
#'
#' @param ctry character. (Optional) the ISO3 code of countries (see \code{\link{am_countries}}). Default is to load data for all African countries. Putting \code{NULL} gets data for all countries (codes available in \code{\link{am_countries_wld}}).
#' @param series character. (Optional) codes of series matching the 'Series' column of the series table (retrieved using \code{\link[=am_series]{am_series()}}).
#' @param from set the start time of the data retrieved by either supplying a start date, a date-string of the form \code{"YYYY-MM-DD"} or \code{"YYYY-MM"},
#' year-quarters of the form \code{"YYYYQN"} or \code{"YYYY-QN"}, a numeric year \code{YYYY} (numeric or character), or a fiscal year of the form \code{"YYYY/YY"}. These expressions are converted to a regular date by \code{\link{am_as_date}}.
#' @param to same as \code{from}: to set the time period until which data is retrieved. For expressions that are not full "YYYY-MM-DD" dates, the last day of the period is chosen.
#' @param labels logical. \code{TRUE} will also return labels (series descriptions) along with the series codes.
#' @param wide logical. \code{TRUE} calls \code{\link{am_pivot_wider}} on the result. \code{FALSE} returns the data in a long format without missing values (suitable for \code{ggplot2}).
#' @param expand.date logical. \code{TRUE} will call \code{\link{am_expand_date}} on the result.
#' @param drop.1iso3c logical. If only one country is selected through \code{ctry}, \code{TRUE} will drop the 'ISO3' column in the output.
#' @param ordered logical. \code{TRUE} orders the result by 'Date' and, if \code{labels = TRUE}, by series, maintaining a fixed column-order of series.
#' \code{FALSE} returns the result in a random order, to the benefit of faster query execution.
#' @param return.query logical. \code{TRUE} will not query the database but instead return the constructed SQL query as a character string (for debugging purposes).
#' @param \dots further arguments passed to \code{\link{am_pivot_wider}} (if \code{wide = TRUE}) or \code{\link{am_expand_date}} (if \code{expand.date = TRUE}).
#'
#' @details If \code{labels = FALSE}, the series table is not joined to the data table, and \code{ordered = TRUE} will order series retrieved in alphabetic order.
#' If \code{labels = TRUE} data is ordered by series and date, preserving the order of columns in the dataset. If multiple countries are received they are ordered alphabetically according to the 'ISO3' column.
#'
#' Series at different frequencies can be queried, but, if \code{wide = TRUE}, this will result in missing values for all but the first observations per period in the lower frequency series.
#'
#' @return A \code{\link[data.table]{data.table}} with the result of the query.
#'
#' @examples \donttest{
#' # Return all indicators for Kenya from 2000
#' am_data("KEN", from = 2000)
#'
#' # Return all indicators for Kenya from 2000 in long format
#' am_data("KEN", from = 2000, wide = FALSE)
#'
#' # Return with date expanded
#' am_data("KEN", from = 2000, expand.date = TRUE)
#'
#' # Same thing in multiple steps (with additional customization options):
#' am_data("KEN", from = 2000, wide = FALSE) |> am_pivot_wider() |> am_expand_date()
#'
#' # Getting only GDP growth
#' am_data("KEN", "NGDP_RPCH", from = 2000)
#'
#' # Getting GDP growth for all countries
#' am_data(series = "NGDP_RPCH", from = 2000)
#'
#' # Reshaping to wider format
#' am_data(series = "NGDP_RPCH", from = 2000) |>
#'   am_pivot_wider(id_cols = "Date",
#'                  names_from = "ISO3",
#'                  values_from = "NGDP_RPCH")
#'
#' # Getting growth and inflation for the EAC countries (all available years)
#' am_data(ctry = c("UGA", "KEN", "TZA", "RWA", "BDI", "SSD"),
#'         series = c("NGDP_RPCH", "PCPIPCH"))
#'
#' }
#' @seealso \code{\link{am_pivot_wider}}, \code{\link{am_expand_date}}, \code{\link{africamonitor}}
#' @export am_data
#'

am_data <- function(ctry = africamonitor::am_countries$ISO3,
                    series = NULL, from = NULL, to = NULL, # , dsid = NULL
                    labels = TRUE, wide = TRUE, expand.date = FALSE, drop.1iso3c = TRUE,
                    ordered = TRUE, return.query = FALSE, ...) {

  c0 <- is.null(ctry)
  s0 <- is.null(series)
  if(!c0 && anyDuplicated(ctry)) stop("duplicated country code: ", paste(ctry[duplicated(ctry)], collapse = ", "))
  if(!s0 && anyDuplicated(series)) stop("duplicated series code: ", paste(series[duplicated(series)], collapse = ", "))
  if(labels) {
     data <- "DATA NATURAL JOIN SERIES"
     lab <- ", Label"
  } else {
    data <- "DATA"
    lab <- ""
  }
  if(!c0 && drop.1iso3c && length(ctry) == 1L) { # This is good -> more intelligent
     cond <- paste0("ISO3 = '", ctry, "'")
     vars <- paste0("Date, Series", lab, ", Value")
     if(ordered) ord <- if(labels) "S_Order, Date" else "Series, Date"
  } else {
     cond <- if(c0) "" else paste0("ISO3 IN ('", paste(ctry, collapse = "', '"), "')")
     vars <- paste0("ISO3, Date, Series", lab,", Value")
     if(ordered) ord <- if(labels) "ISO3, S_Order, Date" else "ISO3, Series, Date"
  }
  if(!s0) {
    add <- if(c0) "" else " AND "
    if(length(series) == 1L) {
      # vars <- sub(", Series", "", vars) # This is inconvenient -> cannot use am_pivot_wider anymore
      cond <- paste0(cond, add, "Series = '", series, "'")
    } else {
      cond <- paste0(cond, add, "Series IN ('", paste(series, collapse = "', '"), "')")
    }
  }
  if(length(from)) cond <- paste0(cond, " AND Date >= '", am_as_date(from) , "'")
  if(length(to))   cond <- paste0(cond, " AND Date <= '", am_as_date(to, end = TRUE) , "'")
  where <- if(c0 && s0) "" else "WHERE"

  query <- if(ordered) paste("SELECT", vars, "FROM", data, where, cond, "ORDER BY", ord) else
                       paste("SELECT", vars, "FROM", data, where, cond)
  if(return.query) return(query)
  res <- safe_query(query)
  if(is.null(res) || !fnrow(res)) {
    message("Query resulted in empty dataset. Please make sure that the ISO3, series-codes or the date-range supplied in your query are consistent with the available data. See am_sources() and am_series(). Alternatively check your connection to the database.")
    return(res)
  }
  res$Date <- as.Date(res$Date)
  setDT(res)
  if(wide) {
    res <- am_pivot_wider(res, ...) # Could optimize to omit ORDER BY clause if wide and length(series) > 1L
    if(length(series) > 1L) setcolorder(res, c(if(c0 || length(ctry) > 1L || !drop.1iso3c) "ISO3", "Date", series))
  }
  if(expand.date) return(am_expand_date(res, ...)) else return(res)
}


#' Reshape Long API Data to Column-Based Format
#'
#' This function automatically reshapes long (stacked) raw data from the API (\code{\link[=am_data]{am_data(..., wide = FALSE)}}) to a wide format where each variable has its own column.
#' It can also be used as a general purpose reshaping command - with an additional capability to handle variable labels.
#'
#' @param data raw data from the API: A long format data frame where all values are stacked in a value column.
#' @param id_cols character. Temporal identifiers of the data. By default all variables in \code{\link{.AMT}} and "ISO3" are selected.
#' @param names_from character. The column containing the series codes. These will become the names of the new columns in the wider data format.
#' @param values_from character. The column containing the data values.
#' @param labels_from character. The column containing the labels describing the series.
#' @param expand.date logical. \code{TRUE} will call \code{\link{am_expand_date}} on the data after reshaping.
#' @param \dots further arguments passed to \code{\link[collapse]{pivot}} or \code{\link{am_expand_date}}.
#'
#' @return A \code{\link[data.table]{data.table}} with the reshaped data.
#'
#' @examples \donttest{
#' # Return all indicators for Kenya and Nigeria from the year 2000 onwards
#' am_pivot_wider(am_data(c("KEN", "NGA"), from = 2000, wide = FALSE))
#' }
#' @seealso \code{\link{am_pivot_longer}}, \code{\link{africamonitor}}
#'
#' @export am_pivot_wider
#'

am_pivot_wider <- function(data, id_cols = intersect(c("ISO3", .AMT), names(data)), # setdiff(names(data), c("Series", "Label", "Value")),
                      names_from = "Series", values_from = "Value",
                      labels_from = if(any(names(data) == "Label")) "Label" else NULL,
                      expand.date = FALSE, ...) {
  res <- pivot_ellipsis(data, id_cols, values_from, names_from, labels_from, how = "wider", sort = "ids", ...) # collapse::pivot
  if(expand.date) return(am_expand_date(res, ...)) else return(res)
}

# This is to allow ellipsis (...) arguments to be passed to both pivot and am_expand_data
pivot_ellipsis <- function(data, ids = NULL, values = NULL, names = NULL, labels = NULL,
                           how = "longer", na.rm = FALSE, factor = c("names", "labels"),
                           check.dups = FALSE, nthreads = 1L, fill = NULL,
                           drop = TRUE, sort = FALSE, transpose = FALSE, ...) {
  pivot(data, ids = ids, values = values, names = names,
        labels = labels, how = how, na.rm = na.rm, factor = factor,
        check.dups = check.dups, nthreads = nthreads, fill = fill,
        drop = drop, sort = sort, transpose = transpose)
}


#' Reshape Column-Based Data to Long Format
#'
#' This function automatically reshapes wide (column-based) data into a long format akin to the format of the raw data coming from the database (\code{\link[=am_data]{am_data(..., wide = FALSE)}}).
#' It can also be used as a general purpose reshaping command - with an additional capability to handle variable labels.
#'
#' @param data a wide format data frame where all series have their own column.
#' @param id_cols character. Temporal identifiers of the data. By default all variables in \code{\link{.AMT}} and "ISO3" are selected.
#' @param to_value character. The names of all series to be stacked into the long format data frame.
#' @param variable_name character. The name of the variable to store the names of the series.
#' @param value_name character. The name of the variable to store the data values.
#' @param label_name character. The name of the variable to store the series labels.
#' @param na.rm logical. \code{TRUE} will remove all missing values from the long data frame.
#' @param variable.factor,label.factor logical. \code{TRUE} will code the "Series" and "Label" columns as factors, which is more memory efficient.
#' @param \dots further arguments passed to \code{\link[data.table]{melt}}.
#'
#' @return A \code{\link[data.table]{data.table}} with the reshaped data.
#'
#' @examples \donttest{
#' # Return all indicators for Kenya and Nigeria from the year 2000 onwards
#' data <- am_data(c("KEN", "NGA"), from = 2000)
#' am_pivot_longer(data)
#' }
#' @seealso \code{\link{am_pivot_wider}}, \code{\link{africamonitor}}
#'
#' @export am_pivot_longer
#'

am_pivot_longer <- function(data, id_cols = intersect(c("ISO3", .AMT), names(data)),
                      to_value = setdiff(names(data), id_cols),
                      variable_name = "Series", value_name = "Value",
                      label_name = "Label", na.rm = TRUE, variable.factor = TRUE,
                      label.factor = TRUE, ...) {
  if(length(label_name)) labs <- vlabels(.subset(data, to_value), use.names = FALSE)
  res <- melt(qDT(data), id_cols, to_value, variable_name, value_name,
              na.rm = na.rm, variable.factor = variable.factor, ...)
  vlabels(res) <- NULL # needed
  if(length(label_name) && !allNA(labs)) {
    if(label.factor) {
      al <- list(levels = labs, class = "factor")
      labs <- if(na.rm) rep(seq_along(labs), fnobs(.subset(data, to_value))) else
                        rep(seq_along(labs), each = fnrow(data))
      attributes(labs) <- al
    } else {
      # melt always preserves column order (otherwise could rep with names and match series column)
      labs <- if(na.rm) rep(labs, fnobs(.subset(data, to_value))) else
                        rep(labs, each = fnrow(data))
    }
    add_vars(res, fncol(res)) <- setNames(list(labs), label_name)
  }
  return(res)
}

# all.equal(roworderv(am_data("bou_mmi", from = 2000, to = 2005)),
#           roworderv(am_pivot_longer(am_pivot_wider(am_data("bou_mmi", from = 2000, to = 2005)))))

#'
#' #' Transpose a Wide Dataset to a Row-Based Format
#' #'
#' #' This function is called by \code{\link{am_write_excel}} with option \code{transpose = TRUE} to generate a row-based tabular data format from a wide data frame in R that is suitable for exporting to Excel.
#' #'
#' #' @param data a wide format data frame where each column is a variable and the first variable uniquely identifies the data.
#' #' @param date.format a format for date columns which is passed to \code{\link[base]{format.Date}}. When transposing wide, dates are converted to character. The default R YYYY-MM-DD format for dates is often not recognized by Excel. By default dates are transformed to DD/MM/YYYY format which Excel (UK English) recognizes. Putting \code{FALSE} here does not transform dates into another format.
#' #'
#' #' @return A transposed data frame or \code{\link[data.table]{data.table}} (the class of the input is preserved).
#' #'
#' #' @examples \donttest{
#' #' am_transpose(am_data("BOU_CPI"))
#' #' }
#' #' @seealso \code{\link[data.table]{transpose}}, \code{\link{am_pivot_wider}}, \code{\link{am_write_excel}}, \code{\link{africamonitor}}
#' #' @export am_transpose
#'
#' am_transpose <- function(data, date.format = "%d/%m/%Y") {
#'   nnum <- cat_vars(data, "logical")[-1L]
#'   lab <- vlabels(data)[-1L]
#'
#'   if(is.character(date.format) && length(dv <- date_vars(data, "indices"))) {
#'     get_vars(data, dv) <-  if(length(dv) == 1L) format(.subset2(data, dv), date.format) else
#'                            lapply(.subset(data, dv), format, date.format)
#'   }
#'
#'   if(any(nnum) && !all(nnum)) {
#'     res <- list(`numeric data` = add_vars(transpose(get_vars(data, c(TRUE, !nnum)),
#'                                                     make.names = 1L, keep.names = "Variable"),
#'                                           list(Label = lab[!nnum]), pos = 2L),
#'                 `non-numeric data` =  add_vars(transpose(get_vars(data, c(TRUE, nnum)),
#'                                                          make.names = 1L, keep.names = "Variable"),
#'                                                list(Label = lab[nnum]), pos = 2L))
#'   } else {
#'     res <- transpose(data, make.names = 1L, keep.names = "Variable")
#'     add_vars(res, 2L) <- list(Label = lab)
#'   }
#'
#'   attr(res, "transposed") <- TRUE
#'   if(inherits(data, "data.table")) {
#'     if(inherits(res, "data.table")) return(qDT(res, keep.attr = TRUE))
#'     for(i in 1:2) setDT(res[[i]])
#'   }
#'   return(res)
#' }

#' #' Export Wide Data to Excel
#' #'
#' #' This function exports a wide format dataset to a column- (default) or row-oriented Excel format.
#' #'
#' #' @param data a wide dataset from \code{\link{am_data}} or reshaped to a wide format with \code{\link{am_pivot_wider}}.
#' #' @param \dots further arguments to \code{\link[writexl]{write_xlsx}}. As a minimum a path needs to be supplied that ends with the name of the Excel file. See Examples.
#' #' @param transpose logical. If \code{TRUE}, the result is returned in a row-oriented Excel format. The default is column oriented (same as the dataset in R).
#' #' @param transpose.date.format argument passed to \code{\link{am_transpose}}, setting the format of date columns when data is transposed.
#' #'
#' #' @examples
#' #' \dontrun{
#' #' # Getting macroeconomic indicators from Bank of Uganda in fiscal years
#' #' data <- am_data("BOU_MMI_FY", from = "2000/01")
#' #'
#' #' # Saving to different Excel formats
#' #' am_write_excel(data, "BOU_MMI_FY.xlsx")
#' #' am_write_excel(data, "BOU_MMI_FY.xlsx", transpose = TRUE)
#' #'
#' #' # Saving to alternative path
#' #' am_write_excel(data, "C:/Users/.../BOU_MMI_FY.xlsx")
#' #' }
#' #'
#' #' @seealso \code{\link{am_transpose}}, \code{\link[writexl]{write_xlsx}}, \code{\link{africamonitor}}
#' #'
#' #' @export am_write_excel
#' #'
#'
#' am_write_excel <- function(data, ..., transpose = FALSE, transpose.date.format = "%d/%m/%Y") {
#'   ntpl <- !isTRUE(attr(data, "transposed"))
#'   if(transpose && ntpl) {
#'     res <- am_transpose(data, transpose.date.format)
#'   } else if(ntpl) {
#'     res <- list(data = data, labels = namlab(data))
#'   } else {
#'     res <- data
#'   }
#'   if(missing(...)) stop("Need to at least provide path to file. See help(write_xlsx).")
#'   return(write_xlsx(res, ...))
#' }

Try the africamonitor package in your browser

Any scripts or data that you put into this service are public.

africamonitor documentation built on May 29, 2024, 7:55 a.m.