interpolate.data <- function(df) {
# linear interpolation
# need at least 2 non-na values to interpolate
if (nrow(df[!is.na(df$value),]) > 0) {
xout <- 1:nrow(df)
y <- df$value
interpolation <- approx(x = xout[!is.na(y)], y = y[!is.na(y)], xout = xout,
rule = 2)
df$yhat <- interpolation$y
#fix for interploation for only 1 dfset of consecutive years
df$yhat = ifelse(is.na(df$yhat), df$value, df$yhat)
}
return(df)
}
#' hinterpolate
#'
#' This is a wrapper function takes a data frame and fills in
#' interpolated and extrapolated data for the whole time series
#'
#' @param df dataframe in iep format
#'
#' @return Returns list with filled in time series, column yhat is the interpolated value.
#' Please check original value with yhat column to make sure you are happy with the results
#'
#' @examples
#' hinterpolate(hcountryexampledata)
#'
#' @keywords imputation
#' @author Dave
#' @export
hinterpolate = function(df){
require(tidyverse)
require(padr)
require(lubridate)
require(testthat)
#need to add another variable for entries with only one country-year-car value,
#interpolate breaks otherwise
df = df %>% select(geocode, variablename, year, value)
if(diff(range(df$year))>0){
num.years.test = df %>% group_by(geocode, variablename) %>%
summarise(only.one.yr = min(year) == max(year), year = min(year), value = max(value)) %>%
filter(only.one.yr)
num.years.test$year = ifelse(num.years.test$year == min(df$year), max(df$year), min(df$year))
df = bind_rows(df, num.years.test[, names(df)])
#extend data frame
df$date = as.Date(paste0(df$year, "-01-01"))
df = pad(df, start_val = min(df$date),
end_val = max(df$date), group = c("geocode", "variablename"), interval = "year")
df$year = lubridate::year(df$date)
df = df %>% select(-date)
#test
num.years.test = df %>% group_by(geocode, variablename) %>%
summarise(only.one.yr = min(year) == max(year))
testthat::expect_that(sum(num.years.test$only.one.yr), equals(0),
info = "You Have only One Entry for Interploation")
#interpolate
df <- df %>% group_by(geocode, variablename) %>% do(interpolate.data(.))
df = as.data.frame(df)
}else{
message("***You only have one year of data across all indicators")
message("***No interpolation or extrapolation possible or needed")
message("***Original data frame returned untouched")
}
return(df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.