R/utils.R

Defines functions checkTimeProdUnique windowMatch fillMissing kennedyBeta daysInMonth isContinuous checkTypes checkNames geomean

# IndexNumR: a package for index number computation
# Copyright (C) 2018 Graham J. White (g.white@unswalumni.com)
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.



#' geomean
#'
#' function for the geometric mean of a vector
#' @param x a numeric vector
#' @keywords internal
#' @noRd
geomean <- function(x, na.rm = TRUE){
  if(na.rm == TRUE){
    x <- x[!is.na(x)]
  }
  return(exp(mean(log(x))))
}

#' checkNames
#'
#' checks if elements of namesVector exist in column names of x
#' @param x a dataframe
#' @param namesVector a vector of strings
#' @return a list where the first element is either TRUE if all
#' strings in namesVector exist in the column names of x,
#' otherwise FALSE. If FALSE, then checkNames returns a second
#' element with an error message that contains a list of the
#' names not contained in the column of x.
#' @keywords internal
#' @noRd
checkNames <- function(x, namesVector){

  goodNames <- colnames(x)

  # remove columns specified as empty strings to allow qvar = "" for
  # elementary indexes
  namesVector <- namesVector[!namesVector == ""]

  badNames <- namesVector[!(namesVector %in% goodNames)]

  if(length(badNames >= 1)){
    err <- paste("The specified name(s) -", paste(badNames, collapse = ", "),
                  "- are not column names of the input data frame. Check the
                 names given to the pvar, qvar, pervar and prodID arguments.")
    return(list(result=FALSE,message=err))
  }
  else {
    return(list(result=TRUE))
  }
}

#' checkTypes
#'
#' checks that the columns of the input matrix are right
#' @param x the dataframe to check
#' @param pvar name of price variable
#' @param qvar name of quantity variable
#' @param pervar name of time period variable
#' @return if all checks pass, the original dataframe. If some
#' columns are the wrong type then either an error, or the
#' input dataframe coerced to the correct types.
#' @keywords internal
#' @noRd
checkTypes <- function(x, pvar, qvar, pervar){

  check <- TRUE

  if(!inherits(x[[pervar]], "numeric")){
    coerced <- try(as.numeric(x[[pervar]]), silent = TRUE)
    if(inherits(coerced, "try-error") | any(is.na(coerced))){
      check = FALSE
      m <- "Time period variable is not numeric and cannot be coerced to numeric"
    }
    else {
      x[[pervar]] <- coerced
    }
  }

  if(!inherits(x[[pvar]], "numeric")){
    coerced <- try(as.numeric(x[[pvar]]), silent = TRUE)
    if(inherits(coerced, "try-error") | any(is.na(coerced))){
      check = FALSE
      m <- "Price variable is not numeric and cannot be coerced to numeric"
    }
    else {
      x[[pvar]] <- coerced
    }
  }

  # don't check qvar for elementary indexes
  if(!qvar == ""){
    if(!inherits(x[[qvar]], "numeric")){
      coerced <- try(as.numeric(x[[qvar]]), silent = TRUE)
      if(inherits(coerced, "try-error") | any(is.na(coerced))){
        check = FALSE
        m <- "Quantity variable is not numeric and cannot be coerced to numeric"
      }
      else {
        x[[qvar]] <- coerced
      }
    }
  }

  if(check){
    return(x)
  }
  else {
    stop(paste("Please correct input data types.", m), call. = FALSE)
  }

}

#' isContinuous
#'
#' checks if a numeric vector has gaps.
#' @param x Vector to check
#' @return a list where the first element contains the
#' result of the check and the second element contains
#' the list of missing elements.
#' @keywords internal
#' @noRd
isContinuous <- function(x){

  check <- all(min(x):max(x) %in% unique(x))

  if(!check){
    missing <- setdiff(min(x):max(x), unique(x))
    err <- paste("The following elements are missing: ",
                 missing)
    return(list(result = FALSE, missing = missing))
  }
  else {
    return(list(result = TRUE))
  }

}

#' daysInMonth
#'
#' calculate the number of days in the month that the given date falls
#' @param x A date
#' @return The number of days in the month in which x falls
#' @keywords internal
#' @noRd
daysInMonth <- function(x){
  month <- format(x,"%m")
  switch (month,
    "01" = return(31),
    "02" = {
      year <- format(x,"%Y")
      return(as.numeric(as.Date(paste0("01-03-",year),format="%d-%m-%Y") -
                     as.Date(paste0("01-02-",year),format="%d-%m-%Y")))
    },
    "03" = return(31),
    "04" = return(30),
    "05" = return(31),
    "06" = return(30),
    "07" = return(31),
    "08" = return(31),
    "09" = return(30),
    "10" = return(31),
    "11" = return(30),
    "12" = return(31)
  )

}


#' kennedyBeta
#'
#' calculate the adjusted regression coefficients for a semi-log model
#' @param x a regression object (e.g. the output from \code{lm})
#' @return a vector of adjusted coefficients
#' @keywords internal
#' @noRd
kennedyBeta <- function(x){

  coeffs <- stats::coef(x)
  vars <- diag(stats::vcov(x))

  return(coeffs - 0.5*vars)

}


#' fillMissing
#'
#' fill in missing observations
#' @param x the dataset
#' @param pvar the price variable
#' @param qvar the quantity variable
#' @param pervar the time period variable
#' @param prodID the product identifier
#' @param priceReplace what to replace missing prices with
#' @param quantityReplace what to replace missing quantities with
#' @keywords internal
#' @noRd
fillMissing <- function(x, pvar, qvar, pervar, prodID, priceReplace, quantityReplace){

  # list of time periods
  pers <- sort(unique(x[[pervar]]))
  # list of products
  prods <- sort(unique(x[[prodID]]))

  # fill out the gaps from missing/new products with replacement values.
  available <- table(x[,c(prodID, pervar)])
  if(sum(!(available == 0)) > 0){

    # which products are not available
    toAdd <- as.data.frame(which(available == 0, arr.ind = TRUE))

    # generate the new observation row for price, quantity, time and product id
    newObs <- data.frame(rep(priceReplace, nrow(toAdd)),
                         rep(quantityReplace,nrow(toAdd)),
                         prods[toAdd[[prodID]]],
                         pers[toAdd[[pervar]]],
                         stringsAsFactors = FALSE)

    # set column names to the ones used in the input dataset
    colnames(newObs) <- c(pvar, qvar, prodID, pervar)

    # add the new observations onto the dataset
    x <- merge(x, newObs, all.x = TRUE, all.y = TRUE)

    # ensure dataset still sorted by time period and product ID
    x <- x[order(x[[pervar]], x[[prodID]]),]
  }

  return(x)

}

#' windowMatch
#'
#' match products over a window of periods
#'
#' @param x data frame of product data
#' @param pervar string for the name of the time period variable
#' @param prodID string for the name of the product identifier variable
#' @return a dataframe of matched product data
#' @keywords internal
#' @noRd
windowMatch <- function(x, pervar, prodID){

  obs <- max(x[[pervar]]) - min(x[[pervar]]) + 1

  tab <- table(x[[pervar]], x[[prodID]])
  keep <- colnames(tab)[colSums(tab) == obs]

  return(x[x[[prodID]] %in% keep,])

}


#' Check for unique time/product combinations
#'
#' This function checks for uniqueness of time/product combinations
#' so that the priceIndex function works as expected.
#'
#' @param x a dataframe
#' @param pervar the time period variable
#' @param prodID the product identifier
#' @keywords internal
#' @noRd
#'
checkTimeProdUnique <- function(x, pervar, prodID){

  combos <- table(x[[pervar]], x[[prodID]])

  check <- !any(combos > 1)

  return(list(result = check))

}
grahamjwhite/IndexNumR documentation built on Nov. 12, 2023, 6:44 p.m.