R/date_gens.R

Defines functions bulk_fpds_csv_years .fpds_csv_data .fpds_bulk_csv_date .tbl_split_amounts .fpds_bulk_dates .year_month_normal .end_of_month .date_gen

Documented in bulk_fpds_csv_years

.date_gen <- function(year = 1988,
                      month = 1,
                      day = 1) {
  month_slug <-
    case_when(nchar(month) == 1 ~ glue("0{month}") %>% as.character(),
              TRUE ~ as.character(month))
  day_slug <-
    case_when(nchar(day) == 1 ~ glue("0{day}") %>% as.character(),
              TRUE ~ as.character(day))
  glue("{year}-{month_slug}-{day_slug}") %>% as.character() %>% ymd()
}


.end_of_month <-
  function(year = 1988, month = 1) {
    month_start <- .date_gen(year = year,
                             month = month,
                             day = 1)
    end_of_month <-
      month_start %m+% months(1) - 1
    end_of_month
  }

.year_month_normal <-
  function(year = 1988) {
    1:12 %>%
      map_dfr(function(month) {
        month_start <-
          .date_gen(year = year,
                    month = month,
                    day = 1)
        fourteenth <-
          .date_gen(year = year,
                    month = month,
                    day = 14)
        fifteenth <- .date_gen(year = year,
                               month = month,
                               day = 15)
        sixteenth <- .date_gen(year = year,
                               month = month,
                               day = 16)
        month_end <-
          .end_of_month(year = year, month = month)
        dateStart <-
          c(month_start,  fifteenth, sixteenth)
        dateEnd <- c(fourteenth, fifteenth, month_end)
        tibble(dateStart, dateEnd) %>%
          mutate(yearData = year(dateStart),
                 monthData = month(dateStart)) %>%
          dplyr::select(yearData, monthData, everything())
      })
  }

.fpds_bulk_dates <-
  function(years = 1977:2000) {
    years %>%
      map_dfr(function(y) {
        if (y == 1977) {
          data <-
            tibble(
              yearData = 1977,
              dateStart = c("1900-01-01") %>% ymd(),
              dateEnd = c("1977-12-31") %>% ymd()
            )
          return(data)
        }
        .year_month_normal(year = y)
      })
  }


.tbl_split_amounts <-
  function() {
    tibble(
      amountMin = c(
        -1000000000,
        0,
        1,
        50,
        100,
        200,
        250,
        350,
        500,
        1000,
        2000,
        3000,
        5000,
        15000,
        25000,
        50000,
        100000,
        150000,
        250000.01,
        500000.01,
        1000000.01,
        175000,
        2500000,
        5000000.01,
        10000000.01,
        25000000.01,
        100000000.01
      ),
      amountMax = c(
        -1,
        0.99,
        49.99,
        99.99,
        199.99,
        249.99,
        349.99,
        499.99,
        999.99,
        1999.99,
        2999.99,
        4999.99,
        14999.99,
        49999.99,
        174999.99,
        24999.99,
        99999.99,
        149999.99,
        250000,
        500000,
        1000000,
        2499999.99,
        5000000,
        10000000,
        25000000,
        100000000,
        10000000000
      )
    )
  }

.fpds_bulk_csv_date <-
  function(start_date = "1987-10-16",
           end_date = "1987-10-31",
           split_amounts = F,
           return_message = F
           ) {
    signed_period <- c(start_date, end_date)
    fpds_csv_safe <- possibly(fpds_csv, tibble())
    if (!split_amounts) {
      data <-
        fpds_csv_safe(signed_date = c(signed_period),
                 unformat = T) %>%
        dplyr::select(-c(urlCSV, typeProcurement))
      return(data)
    }
    df_amounts <- .tbl_split_amounts()
    data <-
      1:nrow(df_amounts) %>%
      map_dfr(function(x) {
        df_amount <- df_amounts %>% dplyr::slice(x)
        amount <- c(df_amount$amountMin, df_amount$amountMax)
        data <- fpds_csv_safe(
          signed_date = signed_period,
          obligated_amount = amount,
          unformat = T,
          return_message = return_message
        )

        if (length(data) == 0) {
          return(tibble())
        }

        data %>%
          dplyr::select(-c(urlCSV, typeProcurement))
      })

    data
  }

.fpds_csv_data <-
  function(data, split_amounts = T, return_message = F) {
    1:nrow(data) %>%
      map_dfr(function(x) {
        x %>% message()
        df_row <- data %>% dplyr::slice(x)

        .fpds_bulk_csv_date(
          start_date = df_row$dateStart,
          end_date = df_row$dateEnd,
          split_amounts = split_amounts,
          return_message = return_message
        )
      })
  }

#' Bulk FPDS download by year
#'
#' Downloads all FPDS data by year
#'
#' @param years vector of years starting in 1977 to current
#' @param split_amounts if \code{TRUE} splits amounts to try to deal with 30,000 result limit
#'
#' @return \code{tibble}
#' @export
#'
#' @examples
bulk_fpds_csv_years <-
  function(years = 2018, split_amounts =  T, return_message = F) {
    options(future.globals.maxSize = 999 * 1024 ^ 10)
    data <- .fpds_bulk_dates(years = years)

    all_data <-
      years %>%
      map_dfr(function(year) {
        data %>%
          filter(yearData == year) %>%
          .fpds_csv_data(split_amounts = split_amounts,
                         return_message = return_message)
      })


    all_data
  }
abresler/govtrackR documentation built on July 11, 2020, 12:30 a.m.