R/beta_conv.R

Defines functions beta_conv

Documented in beta_conv

#' Beta-convergence statistic
#'
#' Given a dataframe of quantitative indicators along time,
#' the unconditional beta convergence is a statistic
#' capturing some important features.
#' A time variable must be present and sorted.
#' Missing values are not allowed. All other columns are indicator values
#' in each considered country.
#'
#'
#'
#' @param  tavDes the sorted dataframe time by countries on the original scale.
#'                No other variable besides time and countries' indicator
#'                must be present.
#' @param  time_0 reference time.
#' @param  time_t target time strictly larger than time_0.
#' @param  all_within  is FALSE if just two different years are considered (default);
#'         if  more than two years  are desired within the
#'         specified interval then it must be  TRUE ; the reference time remains
#'         time_0.
#' @param  timeName the name of the variable that contains time information.
#' @param  useTau  if TRUE the log ratio of indicator values is divided for
#'         the elapsed time (years).
#' @param useCon if TRUE replaces 0 with a minimum constant value
#' @return a list with the value of beta-conv, by OLS (least-squares), the
#'         transformed data and standard statistical tests.
#'
#' @references{ \url{https://www.eurofound.europa.eu/system/files/2022-04/introduction-to-the-convergeu-package-0.6.4-tutorial-v2-apr2022.pdf}}
#'
#' @importFrom  rlang  .data
#' @examples
#'
#' # Example 1:
#' # Dataframe in the format years by countries:
#' require(tibble)
#' myTB1  <- tibble::tribble(
#'   ~years, ~UK, ~DE, ~IT,
#'   1990,   998,  1250, 332,
#'   1988,   1201, 868, 578,
#'   1989,   1150, 978, 682,
#'   1991,  1600,  1350, 802
#' )
#'
#' # Sort the time variable:
#' newdata <- myTB1[order(myTB1$years),]
#'
#' # Beta convergence statistic by considering just two times, e.g. 1989 and 1991:
#' myBC1 <- beta_conv(newdata,1989,1991,timeName="years")
#'
#' # Visualize the summary of the results (estimated coefficients, standard errors, p-values):
#' myBC1$res$summary
#'
#' # Visualize the adjusted R-squared:
#' myBC1$res$adj.r.squared
#'
#' # Beta convergence statistic by considering more than two times:
#' myBC2 <- beta_conv(newdata,1988,1991,all_within=TRUE,timeName="years")
#'
#' # Example 2:
#' # Dataframe in the format years by countries, time variable already sorted:
#' testTB <- tribble(
#'     ~time, ~countryA ,  ~countryB,  ~countryC,
#'     2000,     0.8,   2.7,    3.9,
#'     2001,     1.2,   3.2,    4.2,
#'     2002,     0.9,   2.9,    4.1,
#'     2003,     1.3,   2.9,    4.0,
#'     2004,     1.2,   3.1,    4.1,
#'     2005,     1.2,   3.0,    4.0
#'     )
#' myBC3 <- beta_conv(testTB, time_0 = 2000, time_t = 2005, timeName = "time")
#' myBC4 <- beta_conv(testTB, time_0 = 2000, time_t = 2005, all_within = TRUE, timeName = "time")
#'
#' # Example 3
#' # Beta convergence for the emp_20_64_MS Eurofound dataset:
#' data(emp_20_64_MS)
#' empBC <- beta_conv(emp_20_64_MS, time_0 = 2002, time_t = 2006, timeName = "time")
#'
#' # Summary of the model results:
#' empBC$res$summary
#'
#' # Adjusted R-squared:
#' empBC$res$adj.r.squared
#'
#'
#' @export
#'
beta_conv <- function(tavDes,time_0, time_t, all_within=FALSE,
                      timeName = "time",  useTau = TRUE, useCon = FALSE){


  obj_out <- convergEU_glb()$tmpl_out
  # check if timeName is present
  if(not_in(timeName,names(tavDes))) {
    obj_out$err <- "Error: declared time variable absent."
    return(obj_out)
  }else{};
  # check time
  if(time_0 >= time_t){
    obj_out$err <- "Error: wrong selected time window."
    return(obj_out);
  }
  if(not_in(time_0 ,unlist(tavDes[,timeName]))) {
    obj_out$err <- "Error: wrong selected time window."
    return(obj_out);
  }
  if(not_in(time_t, unlist(tavDes[,timeName]))) {
    obj_out$err <- "Error: wrong selected time window."
    return(obj_out);
  }
  # implementing constant where min is 0
  ecode <- names(tavDes)[-c(1)]
  tempo_val1 <- unlist(tavDes[,ecode])
  tempo_val <- tempo_val1[tempo_val1 > 0]
  if(useCon){
    minim_not_null <- min(tempo_val)/100
    if(all(tavDes[,ecode] >= 0)){
      for(auxvv in ecode){
        estrattore_nulli <- tavDes[[auxvv]] == 0
        tavDes[[auxvv]][estrattore_nulli] <- minim_not_null
      }
    }
  }else{
    if(length(tempo_val1) > length(tempo_val)){
      obj_out$err <- "Error: One or more values zero or negative"
    }
  }


  # Make standard checks on the dataframe
  tt_out <- check_data(tavDes, timeName = timeName)
  if(!is.null(tt_out$err)){
    return(tt_out);
  }
  # At least three columns and 2 rows needed due to differencing
  if(nrow(tavDes) < 2 && ncol(tavDes) < 4){
    obj_out$err <- "Error: at least 2 rows and 4 columns needed in the dataframe."
    return(obj_out)
  }
  # extract and sort
  tavDes <- dplyr::arrange_at(tavDes,timeName)
  #timeRaw <- unlist(tavDes[,timeName])
  timeRaw <- tavDes[[timeName]]
  pos_t0 <- which(timeRaw == time_0)
  pos_tt <- which(timeRaw == time_t)
  pos_all <- which( (timeRaw >= time_0) & (timeRaw <= time_t) )
  delta_time <- time_t - time_0
  if(delta_time <= 0){
    obj_out$err  <- "Error: null or negative elapsed years."
    return(obj_out)
  }
  #
  if(all_within) {# all times within the interval
    resTB <- tavDes[pos_all,]
    # sorted by time
    resTB <- dplyr::arrange_at(resTB,timeName)
  }else{
    #just the two considered rows
    resTB <- tavDes[c(pos_t0,pos_tt),]
  };
  # now it it sure the reference is in the first row
  # Non negative values? Needed because log(indicator) taken
  if( sum(resTB[,-which(names(resTB) == timeName)]  <= 0) > 0){    ###########dplyr::select(resTB, -timeName)  <= 0) > 0){
    obj_out$err  <- "Error: negative values in the indicator."
    return(obj_out)
  }
  # log-transform, already checked positivity
  reslogTB <- log(resTB[,-which(names(resTB) == timeName)])    ##########dplyr::select(resTB, -timeName))
  ## wTB <- cbind(dplyr::select(resTB, timeName),reslogTB)
  ## Should we divide the ordinates by number of elapsed years? YES, possibly
  workTB <- dplyr::tibble(
    deltaIndic = as.numeric(reslogTB[2,] - reslogTB[1,]),
    indic = as.numeric(reslogTB[1,]),
    countries = names(reslogTB))
  if(useTau){
    # divide by elapsed time
    workTB <- dplyr::mutate(workTB, deltaIndic = .data$deltaIndic / delta_time);
  }
  #
  if(nrow(reslogTB) > 2){# several years besides time_t and time_0
    for(auxR in 3:nrow(reslogTB)){
      tmpTB <- dplyr::tibble(
        deltaIndic = as.numeric(reslogTB[auxR,] - reslogTB[1,]),
        indic = as.numeric(reslogTB[1,]),# maybe reslogTB[auxR-1, ]
        countries = names(reslogTB))
      # tau?
      if(useTau){
        # divide by elapsed time
        tmpTB <- dplyr::mutate(tmpTB, deltaIndic = .data$deltaIndic / delta_time);
      }
      workTB <- rbind(workTB,tmpTB)# end add_row
    };
  }# end several years
  #
  # make calculations
  workMOD <- stats::lm(deltaIndic  ~ indic, data=workTB)
  obj_out$res <- list(
    workTB = workTB,
    model = workMOD,
    summary = broom::tidy(workMOD),
    beta1 = NA,
    adj.r.squared = summary(workMOD)$adj.r.squared
  )
  obj_out$res$beta1 <- as.numeric(obj_out$res$summary[2,2])
  return(obj_out)
}

Try the convergEU package in your browser

Any scripts or data that you put into this service are public.

convergEU documentation built on May 29, 2024, 11:15 a.m.