R/sec_functions.R

Defines functions sec_ciks

Documented in sec_ciks

# CIKs --------------------------------------------------------------------

#' SEC CIK registered entities
#'
#' This function returns information on every \href{https://en.wikipedia.org/wiki/Central_Index_Key}{Central Index Key}
#' registered entity.  Data about CIK filing entity can be discovered
#' using the \code{\link{sec_filer}} function inputing the CIK.
#'
#' @param return_message \code{TRUE} return a message after data import
#'
#' @return \code{tibble}
#' @references \href{http://sec.gov}{The Securities and Exchange Commission}
#' @export
#' @import dplyr readr tidyr stringr
#' @family SEC
#' @family entity search
#' @family fund data
#' @examples
#' sec_ciks(return_message = TRUE)
sec_ciks <-
  function(return_message = TRUE) {

    cookies = c(
      'ak_bmsc' = 'BA09B9B7E282820D33A847AB6A4F577C~000000000000000000000000000000~YAAQEIXYF9KjfYuAAQAAtA9NkA/Asr69IqJoGtc80iFGaJxeyP/fnqUjhwOkojONIdc3b8zucr8+he1KdbmavGbRp6Bg53ClwHiw0u8kKwNbaufzhSpP74TwvSUfZhrK4Xk1CBHIR1BGudopKt6ds20EpWSCAos3IxzOKIGoGqBOHSIatGivOOcRK/l79CSBT8c7hvz/dF/jvduX42v4cOr5ff1p973FSHh6Pag8I+do5iiJJ9gch1a241qP1XOoz68SyvRYr77owlIoLuSvHiVhJzwVAnu0xZQxGWTWxAjvRPO4oY+p9asvti0DnuqL4VBnBqZFFTuq1Kdj3alR/dtzSU0EhLP7ij8Q6yU/P1jL4hwxnvUWKxSXkNho9DYUrVmOMvk+tYIMvpdj2+yH2fvz6n1JMXt1ovIq27f0SA==',
      'bm_sv' = 'F8C2F590FCEE07B36615FEA14209764D~YAAQEIXYFw2mfYuAAQAAsmpNkA/mkbJEFnhI9pVm9obV/qxGm3cqtAprkBZr42oLuRJF1PuYuEl9UZP/1QFAo0Uzy+Y8qQri6/BCoEczGMA36G2JWqz2WnV44Jux4vUP8sCpnBCeTYRFkAJWS8tpuawox2wws0IanSoClmvxL9eeP0R88baDAhujCikpQhG1/4WH6zUQpVsmStrp3IMBtoOjU5BVjB2W/ql7/Elb9D2dwEqQheWrqQYUorOY~1'
    )

    headers = c(
      `authority` = 'www.sec.gov',
      `accept` = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
      `accept-language` = 'en-US,en;q=0.9',
      `cache-control` = 'no-cache',
      `pragma` = 'no-cache',
      `sec-fetch-dest` = 'document',
      `sec-fetch-mode` = 'navigate',
      `sec-fetch-site` = 'none',
      `sec-fetch-user` = '?1',
      `sec-gpc` = '1',
      `upgrade-insecure-requests` = '1',
      `user-agent` = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36'
    )

    res <- httr::GET(url = 'https://www.sec.gov/Archives/edgar/cik-lookup-data.txt', httr::add_headers(.headers=headers), httr::set_cookies(.cookies = cookies))

    page <- res %>% rvest::read_html()
    entities <- page %>%
      html_nodes("p") %>%
      html_text()

    entities <- entities %>% str_split("\n") %>% flatten_chr()
    data <-
      tibble(X1 = entities) %>%
      tidyr::separate(X1,
                      into = c('nameIssuer', 'codeCIK'),
                      sep = '\\:[0][0]') %>%
      mutate(
        codeCIK = codeCIK %>% stringr::str_replace('\\:', ''),
        codeCIK = "00" %>% paste0(codeCIK),
        idCIK = codeCIK %>% as.numeric
      ) %>%
      mutate(
        nameIssuer = nameIssuer %>% str_to_upper(),
        urlRankAndFiled = paste0('http://rankandfiled.com/#/', idCIK, '/table'),
        urlEDGAR = list(
          'https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=',
          codeCIK,
          '&type=&dateb=&owner=exclude&start=0&count=100&output=xml'
        ) %>% purrr::invoke(paste0, .),
        datetimeData = Sys.time()
      ) %>%
      dplyr::select(idCIK, nameIssuer, everything()) %>%
      suppressMessages() %>%
      suppressWarnings() %>%
      arrange(desc(idCIK))

    if (return_message) {
      "You returned " %>%
        paste0(
          data %>% nrow %>% formattable::comma(digits = 0),
          ' entities with registered CIK codes'
        ) %>%
        cat(fill = T)
    }
    data
  }


# FOIA --------------------------------------------------------------------


.get_foia_url_df <-
  function() {
    page <-
      'https://www.sec.gov/foia/docs/foia-logs.htm' %>%
      read_html()

    slugs <-
      page %>%
      html_nodes('td:nth-child(1) a ')
      html_nodes('.views-field-extension a') %>%
      html_attr('href') %>%
      str_replace_all('http://www.sec.gov', '')

    urls <-
      slugs %>%
      paste0('https://www.sec.gov', .)

    url_df <-
      slugs %>%
      future_map_dfr(function(x) {
        items <-
          x %>%
          str_replace_all('/foia/logs/foia-log-fy|.csv', '') %>%
          str_split('\\-') %>%
          flatten_chr()

        if (items %>% length() == 2) {
          df <-
            tibble(yearData = items[[1]] %>% as.numeric,
                       quarterData = items[[2]])
        } else {
          df <-
            tibble(yearData = items %>% as.numeric())
        }
        return(df)
      }) %>%
      mutate(urlFOIA = urls)

    return(url_df)
  }

.parse_foia_url_df <-
  function(url = 'http://www.sec.gov/foia/logs/foia-log-fy2013.csv',
           return_message = TRUE) {
    data <-
      url %>%
      read_csv() %>%
      slice(-1) %>%
      suppressWarnings() %>%
      suppressMessages()

    if ("X10" %in% names(data)) {
      data <-
        data %>%
        select(-10) %>%
        purrr::set_names(
          c(
            'idSECRequest',
            'nameRequester',
            'nameOrganization',
            'descriptionRequest',
            'dateRequest',
            'dateReceived',
            'statusRequest',
            'dateClosed',
            'typeOutcome'
          )
        ) %>%
        filter(!nameRequester %>% is.na())
    } else {
      data <-
        data %>%
        select(-dplyr::matches("^X")) %>%
        purrr::set_names(
          c(
            'idSECRequest',
            'nameRequester',
            'nameOrganization',
            'categoryOrganization',
            'descriptionRequest',
            'dateRequest',
            'dateReceived',
            'statusRequest',
            'dateClosed',
            'typeOutcome'
          )
        ) %>%
        filter(!nameRequester %>% is.na())
    }

    data <-
      data %>%
      mutate_at(.vars = data %>% select(dplyr::matches("date")) %>% names,
                funs(. %>% lubridate::mdy())) %>%
      mutate_at(
        .vars = data %>% select(dplyr::matches("^name|^description|^category")) %>% names,
        funs(. %>% str_replace('\\-', '') %>% stringr::str_to_upper())
      ) %>%
      mutate(
        nameOrganization = ifelse(nameOrganization == '', NA, nameOrganization),
        typeOutcome = ifelse(typeOutcome == '-', NA, typeOutcome)
      ) %>%
      tidyr::separate(
        nameRequester,
        into = c('nameLast', 'nameFirst', 'titlePerson'),
        sep = '\\, '
      ) %>%
      mutate(
        nameRequester = ifelse(!nameFirst %>% is.na(),
                               paste(nameFirst, nameLast),
                               nameLast),
        nameRequester = ifelse(
          !titlePerson %>% is.na(),
          paste(nameRequester, titlePerson, sep = ', '),
          nameRequester
        ),
        isClosed = ifelse(statusRequest %>% str_detect('Closed'), TRUE, FALSE),
        isGranted = ifelse(typeOutcome %>% str_detect('Granted '), TRUE, FALSE),
        isGrantedPartial = ifelse(typeOutcome %>% str_detect('Granted/Denied'), TRUE, FALSE),
        urlFOIA = url
      ) %>%
      select(-c(nameLast, nameFirst, titlePerson)) %>%
      select(idSECRequest, nameRequester, everything()) %>%
      suppressWarnings() %>%
      suppressMessages()

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>%
        cat(fill = T)
    }

    return(data)
  }



#' SEC FOIA requests
#'
#' This function returns returns all Freedom of Information Act requests
#' submitted to the SEC begining in 2006.
#'
#' @param search_years vector of years to search, starting in 2006
#' @param return_message \code{TRUE} return a message after data import
#' @param nest_data \code{TRUE} return nested data frame
#' @return nested \code{tibble} or \code{tibble} if \code{nest_data = FALSE}
#' @references \href{http://sec.gov}{The Securities and Exchange Commission}
#' @export
#' @import purrr stringr dplyr rvest tidyr curl
#' @importFrom lubridate mdy
#' @importFrom readr read_csv
#' @family SEC
#' @examples
#' \dontrun{
#' sec_foia_requests(search_years = 2006:2017, nest_data = TRUE,
#' return_message = TRUE)
#' }
sec_foia_requests <-
  function(search_years = 2006:2016,
           nest_data = TRUE,
           return_message = TRUE) {
    url_df <-
      .get_foia_url_df()

    if (!'search_years' %>% exists()) {
      urls <-
        url_df$urlFOIA
    } else {
      urls <-
        url_df %>%
        filter(yearData %in% search_years) %>%
        .$urlFOIA
    }

    parse_foia_url_df_safe <-
      purrr::possibly(.parse_foia_url_df, NULL)

    all_data <-
      urls %>%
      future_map_dfr(function(x) {
        parse_foia_url_df_safe(url = x, return_message = TRUE)
      }) %>%
      arrange(desc(dateRequest))

    all_data <-
      all_data %>%
      mutate(urlFOIA = urlFOIA %>% str_replace_all('\\http:', '\\https:')) %>%
      left_join(url_df) %>%
      select(yearData, everything()) %>%
      suppressMessages()

    if (return_message) {
      list(
        "You parsed ",
        all_data %>% nrow() %>% formattable::comma(digits = 0),
        " SEC FOIA Requests from ",
        url_df$yearData %>% unique() %>% min(),
        ' to ',
        url_df$yearData %>% unique() %>% max()
      ) %>%
        purrr::invoke(paste0, .) %>%
        cat(fill = T)
    }

    if (nest_data) {
      all_data <-
        all_data %>%
        nest(-yearData, .key = dataFOIA)
    }

    return(all_data)
  }


# cusip -------------------------------------------------------------------

.get_cusip_url_df <-
  function() {
    page <-
      "https://www.sec.gov/divisions/investment/13flists.htm" %>%
      read_html()


    periods <-
      page %>%
      html_nodes("#block-secgov-content a") %>%
      html_text() %>%
      str_replace_all("Current List ", '') %>%
      str_replace_all('\\(|\\)', '') %>%
      str_replace_all('Third|third', '3rd') %>%
      str_replace_all('First|first', '1st') %>%
      str_replace_all('Second|second', '2nd') %>%
      str_replace_all('Fourth|fourth', '4th')

    urls <-
      page %>%
      html_nodes("#block-secgov-content a") %>%
      html_attr('href') %>%
      paste0('https://www.sec.gov', .)

    data <-
      tibble(namePeriod = periods, urlSEC = urls) %>%
      mutate_all(str_squish) %>%
      filter(namePeriod != "")

    url_df <-
      data %>%
      separate(namePeriod,
               sep = '\\ ',
               into = c('idPeriod', 'quarter', 'yearData')) %>%
      mutate(
        idPeriod = idPeriod %>% readr::parse_number(),
        yearData = yearData %>% as.numeric,
        quarter = 'q'
      ) %>%
      unite(periodData,
            yearData,
            quarter,
            idPeriod,
            sep = '',
            remove = F) %>%
      select(-quarter)

    current_year <-
      Sys.Date() %>% lubridate::year()

    year_df <-
      tibble(yearData = 2004:(current_year),
                 pageStart = 3)

    url_df <-
      url_df %>%
      left_join(
        url_df %>% filter(yearData >= 2004) %>%
          select(periodData) %>%
          mutate(pageStart = 3) %>%
          bind_rows(tibble(
            periodData = c(
              "2003q4",
              "2003q3",
              "2003q2",
              "2003q1",
              "2002q4",
              "2002q3",
              "2002q2",
              "2002q1",
              "2001q4",
              "2001q3",
              "2001q2",
              "2001q1",
              "2000q4",
              "2000q3",
              "2000q2",
              "2000q1",
              "1999q4",
              "1999q3",
              "1999q2",
              "1999q1",
              "1998q4",
              "1998q3",
              "1998q2",
              "1998q1",
              "1997q3",
              "1997q2",
              "1997q1",
              "1996q4",
              "1996q3",
              "1996q2",
              "1996q1"
            ),
            pageStart = c(
              2,
              3,
              2,
              2,
              2,
              2,
              2,
              2,
              2,
              2,
              2,
              2,
              1,
              1,
              1,
              1,
              1,
              1,
              2,
              2,
              2,
              2,
              2,
              3,
              2,
              2,
              2,
              2,
              2,
              2,
              3
            )
          ))
      ) %>%
      left_join(tibble(
        periodData = "2016q3",
        areaTable = c("113.14286-64.42471-727.78378-520.05405")
      ))

    url_df
  }

.parse_sec_cusip_url <-
  function(url = "https://www.sec.gov/divisions/investment/13f/13flist2016q3.pdf",
           start_page = 3,
           table_area = "113.14286-64.42471-727.78378-520.05405",
           return_message) {
    list("Be patient ocr'ing ", url, ' may take a while') %>%
      purrr::invoke(paste0, .) %>%
      cat(fill = T)

    if (!table_area %>% is.na()) {
      table_area <-
        table_area %>%
        str_split('\\-') %>%
        flatten_chr %>%
        as.numeric()
    }

    df_metadata <-
      url %>%
      tabulizer::extract_metadata() %>%
      flatten_df()

    if ('modified' %in% names(df_metadata)) {
      date_parts <-
        df_metadata$modified %>% str_replace_all(" EDT | EST ", ' ') %>%
        str_split('\\ ') %>%
        flatten_chr()

      date_year <-
        date_parts[date_parts %>% length()]

      other_parts <-
        date_parts[1:(date_parts %>% length() - 1)] %>%
        .[2:length(.)]

      datetime_file <-
        c(date_year, other_parts) %>%
        paste0(collapse = ' ') %>%
        lubridate::ymd_hms()
    } else {
      datetime_file <- NA
    }

    if (table_area %>% is.na()) {
      tables <-
        url %>%
        tabulizer::extract_tables(start_page:df_metadata$pages)
    } else {
      tables <-
        url %>%
        tabulizer::extract_tables(start_page:df_metadata$pages,
                       area = list(table_area))
    }

    all_data <-
      seq_along(tables) %>%
      future_map_dfr(function(x) {
        tables[[x]] %>%
          as_tibble() %>%
          mutate_all(str_trim)
      }) %>%
      select(-6) %>%
      tidyr::separate(V1,
                      sep = ' ',
                      into = c('idCUSIPBase', 'codeCUSIP1', 'codeCUSIP2')) %>%
      mutate_all(str_trim) %>%
      unite(idCUSIP,
            idCUSIPBase,
            codeCUSIP1,
            codeCUSIP2,
            sep = '',
            remove = FALSE) %>%
      select(idCUSIP, everything()) %>%
      dplyr::rename(
        isEquitySecurity = V2,
        nameIssuer = V3,
        descriptionIssuer = V4,
        statusIssuer = V5
      ) %>%
      filter(!idCUSIPBase %>% str_detect("CUSIP")) %>%
      suppressWarnings()

    all_data <-
      all_data %>%
      mutate(
        codeCUSIP2 = codeCUSIP2 %>% as.numeric(),
        isEquitySecurity = ifelse(isEquitySecurity == "*", TRUE, FALSE),
        statusIssuer = ifelse(statusIssuer == '', NA, statusIssuer),
        descriptionIssuer = ifelse(descriptionIssuer == '', statusIssuer, descriptionIssuer),
        statusIssuer = ifelse(statusIssuer == descriptionIssuer, NA, statusIssuer),
        isSecurityNew = ifelse(statusIssuer == "ADDED", TRUE, FALSE),
        isSecurityDeleted = ifelse(statusIssuer %>% str_detect("DELETED"), TRUE, FALSE)
      ) %>%
      unite(nameSecurity,
            nameIssuer,
            descriptionIssuer,
            sep = ' ',
            remove = FALSE)

    all_data <-
      all_data %>%
      left_join(
        all_data %>% mutate(idRow = 1:n()) %>% group_by(idCUSIPBase) %>% filter(idRow == min(idRow)) %>%
          ungroup() %>% select(idCUSIPBase, nameEntity = nameIssuer)
      ) %>%
      select(idCUSIP:codeCUSIP2,
             nameEntity,
             nameIssuer,
             nameSecurity,
             everything()) %>%
      suppressMessages() %>%
      mutate(urlSEC = url) %>%
      distinct()

    all_data <-
      all_data %>%
      mutate(
        isDebtSecurity = descriptionIssuer %>% str_detect("%"),
        pctRateNote = ifelse(isDebtSecurity == T, descriptionIssuer, NA)
      ) %>%
      tidyr::separate(pctRateNote,
                      sep = '\\%',
                      into = c('pctRateNote', 'ignore')) %>%
      mutate(
        pctRateNote = pctRateNote %>% readr::parse_number() / 100,
        isADR = descriptionIssuer %>% str_detect("\\ ADR")
      ) %>%
      select(-dplyr::matches("ignore")) %>%
      select(idCUSIP:nameSecurity,
             dplyr::matches("is"),
             dplyr::matches("pct"),
             everything())

    if (return_message) {
      list(
        "Retrieved ",
        all_data %>% nrow() %>% formattable::comma(digits = 0),
        ' CUSIPs as of ',
        datetime_file
      ) %>%
        purrr::invoke(paste0, .) %>%
        cat(fill = T)
    }

    return(all_data)
  }

#' SEC CUSIPs
#'
#' This function returns information on all Committee on Uniform Securities Identification Procedures [CUSIP]
#' registered entities for a specified period.  The SEC exposes this data in PDF
#' form so this function can take a long time to run.
#'
#' @descripton returns a data frame with all CUSIPs for specified years and quarters
#'
#' @param years vector years to search, starting in 1996
#' @param quarters quarters to search \itemize{
#' \item\code{NULL}(default): all quarters,
#' \item \code{1}: Q1
#' \item \code{2}: Q2
#' \item \code{3}: Q3
#' \item \code{4}: Q4
#' }
#' @param only_most_recent \code{TRUE} search only the most recent period
#' @param return_message \code{TRUE} return a message after data import
#' @param nest_data \code{TRUE} return nested data frame
#' @return nested \code{tibble} or \code{tibble} if \code{nest_data = FALSE}
#' @references \href{http://sec.gov}{The Securities and Exchange Commission}
#' @import purrr stringr dplyr rvest formattable tidyr xml2 tabulizer
#' @importFrom lubridate mdy
#' @importFrom readr read_csv
#' @return
#' @export
#' @family SEC
#' @family entity search
#' @family securities search
#'
#' @examples
#' \dontrun{
#' sec_cusips(only_most_recent = FALSE, years = c(2016, 2010),
#' quarters = c(4),
#' return_message = TRUE)
#' }
sec_cusips <-
  function(only_most_recent = FALSE,
           years = 1996:2016,
           quarters = NULL,
           nest_data = FALSE,
           return_message = TRUE) {
    is_blank <-
      (only_most_recent == F &
         length(years) == 0  & length(quarters) == 0)

    if (is_blank) {
      stop("Please enter years and/or quarters or select only most recent")
    }

    message(
      "Please be patient this function can take a while\nCUSIP dumps are generally over 500 pages\neach one of which must be OCR'd"
    )

    url_df <-
      .get_cusip_url_df()

    is_current <-
      only_most_recent == TRUE

    if (is_current) {
      urls_df <-
        url_df %>%
        slice(1)
    } else {
      urls_df <-
        url_df %>%
        filter(yearData %in% years)
      if (length(quarters) > 0) {
        if (quarters %in% 1:4 %>% sum() == 0) {
          stop("Quarters can only be 1, 2, 3, or 4")
        }
        urls_df <-
          urls_df %>%
          filter(idPeriod %in% quarters)
      }
    }

    parse_sec_cusip_url_safe <-
      purrr::possibly(.parse_sec_cusip_url, tibble())

    all_data <-
      1:nrow(urls_df) %>%
      future_map_dfr(function(x) {
        parse_sec_cusip_url_safe(
          url = urls_df$urlSEC[[x]],
          return_message = return_message,
          table_area = urls_df$areaTable[[x]],
          start_page = urls_df$pageStart[[x]]
        )
      }) %>%
      suppressWarnings() %>%
      suppressMessages()

    all_data <-
      all_data %>%
      left_join(url_df %>% select(periodData, yearData, urlSEC)) %>%
      suppressMessages()

    if (return_message) {
      "You returned " %>%
        paste0(
          all_data %>% dplyr::select(idCUSIP) %>% distinct() %>% nrow() %>% formattable::comma(digits = 0),
          ' securities with cusips codes'
        ) %>%
        cat(fill = T)

    }

    if (nest_data) {
      all_data <-
        all_data %>%
        nest(-periodData, .key = dataCUSIPs)

    }

    return(all_data)
  }

# closed_end_funds --------------------------------------------------------

.get_closed_end_fund_url_df <-
  function() {
    page <-
      "https://www.sec.gov/opa/data/opendatasetsshtmlclosed-end-investment_company.html" %>%
      read_html()


    urls <-
      page %>% html_nodes('.views-field-field-display-title a') %>%
      html_attr('href') %>%
      paste0('https://www.sec.gov', .)
    urls <- urls[urls %>% str_detect("csv")]
    years <- page %>% html_nodes('.views-field-field-display-title a') %>% html_text() %>% unique() %>% as.numeric()


    data <-
      tibble(yearData = years,
               urlData = urls)

    data

  }

.parse_closed_end_fund_url <-
  function(url = "https://www.sec.gov/open/datasets/closed-end_investment_company.csv",
           return_message = TRUE) {
    df <-
      url %>%
      read_csv() %>%
      slice(-1) %>%
      mutate_all(str_to_upper) %>%
      select(1:10) %>%
      purrr::set_names(
        c(
          'idSEC',
          'idCIK',
          'nameEntity',
          'addressStreet1Entity',
          'addressStreet2Entity',
          'cityEntity',
          'stateEntity',
          'zipcodeEntity',
          'dateLastFiling',
          'typeLastFiling'
        )
      )
    df <-
      df %>%
      mutate(
        idCIK = idCIK %>% as.numeric(),
        dateLastFiling = dateLastFiling %>% lubridate::mdy(),
        hasCO = addressStreet1Entity %>% str_detect("C/O"),
        hasCO1 = addressStreet2Entity %>% str_detect("C/O"),
        addressStreetEntity = ifelse(
          addressStreet2Entity %>% is.na(),
          addressStreet1Entity,
          paste(addressStreet1Entity, addressStreet2Entity)
        ),
        addressEntity = list(
          addressStreetEntity,
          " ",
          cityEntity,
          ", ",
          stateEntity,
          " ",
          zipcodeEntity
        ) %>% purrr::invoke(paste0, .),
        nameManager = ifelse(hasCO == TRUE, addressStreet1Entity, NA) %>% str_replace_all("C/O", ""),
        nameManager1 = ifelse(hasCO1 == TRUE, addressStreet2Entity, NA) %>% str_replace_all("C/O", ""),
        nameManager = ifelse(!nameManager1 %>% is.na(), nameManager1, nameManager),
        urlSECData = url
      ) %>%
      select(-dplyr::matches("nameManager1|hasCO")) %>%
      select(
        idSEC,
        idCIK,
        dateLastFiling,
        nameEntity,
        nameManager,
        typeLastFiling,
        everything()
      )

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>%
        cat(fill = T)
    }

    return(df)

  }

#' SEC registered closed-end funds
#'
#' This function returns information on SEC registered
#' closed-end funds for a specified period.
#'
#' @param years vector of years to search starting in 2012
#' @param only_most_recent \code{TRUE} return only the most recent year
#' @param return_message \code{TRUE} return a message after data import
#' @param nest_data \code{TRUE} return nested data frame
#' @return nested \code{tibble} or \code{tibble} if \code{nest_data = FALSE}
#' @references \href{http://sec.gov}{The Securities and Exchange Commission}
#' @export
#' @import purrr stringr dplyr rvest readr lubridate formattable tidyr
#' @importFrom readr read_csv
#' @family SEC
#' @family entity search
#' @family fund data
#' @examples
#' \dontrun{
#' sec_closed_end_funds(years = 2015:2017)
#' }
sec_closed_end_funds <-
  function(only_most_recent = T,
           years =NULL,
           nest_data = F,
           return_message = TRUE) {
    url_df <-
      .get_closed_end_fund_url_df()

    if (length(years) > 0) {
      urls  <-
        url_df %>%
        filter(yearData %in% years) %>%
        .$urlData
    }

    if (years %>% length() > 1) {
      only_most_recent <-
        FALSE
    }

    if (length(years) >0 & (!only_most_recent)) {
      urls <-
        url_df$urlData
    }

    if (only_most_recent) {
      urls <-
        url_df %>%
        slice(1) %>%
        .$urlData
    }

    parse_closed_end_fund_url_safe <-
      purrr::possibly(.parse_closed_end_fund_url, tibble())

    all_data <-
      urls %>%
      future_map_dfr(function(x) {
        parse_closed_end_fund_url_safe(url = x, return_message = TRUE)
      }) %>%
      mutate_if(is.character,
                list(function(x){
                  case_when(
                    x == "[NULL]" ~ NA_character_,
                    TRUE ~ x
                  )
                }))

    all_data <-
      all_data %>%
      left_join(url_df %>% dplyr::rename(urlSECData = urlData)) %>%
      suppressMessages() %>%
      select(yearData, everything()) %>%
      suppressMessages()

    if (return_message) {
      list("Acquired ",
           all_data %>% nrow() %>% formattable::comma(digits = 0),
           ' Closed End Funds') %>%
        purrr::invoke(paste0, .) %>%
        cat(fill = T)
    }

    if (nest_data) {
      all_data <-
        all_data %>%
        nest(-yearData, .key = dataClosedEndFunds)
    }
    all_data
  }


# investment_companies ----------------------------------------------------

#' SEC registered investment companies
#'
#' This function returns information about SEC
#' registered investment companies including:\itemize{
#' \item Insurance Companies
#' \item Insurance Accounts
#' \item Mutual Funds
#' \item Closed-end Funds
#' }
#'
#' @param only_most_recent \code{TRUE} return only the most recent year
#' @param return_message \code{TRUE} return a message after data import
#' @return nested \code{tibble} or \code{tibble} if \code{nest_data = FALSE}
#' @references \href{http://sec.gov}{The Securities and Exchange Commission}
#' @export
#' @import purrr stringr dplyr rvest formattable tidyr
#' @importFrom lubridate mdy
#' @importFrom readr read_csv
#' @family SEC
#' @family entity search
#' @family fund search
#' @examples
#' sec_investment_companies(nest_data = TRUE, return_message = TRUE)

sec_investment_companies <-
  function(nest_data = TRUE,
           return_message = TRUE) {
    df <-
      "https://www.sec.gov/open/datasets/investment_company_series_class.csv" %>%
      read_csv() %>%
      purrr::set_names(
        c(
          'idSEC',
          'idCIK',
          'nameManager',
          'idOrganizationType',
          'idSeries',
          'nameFund',
          'idClass',
          'nameClass',
          'idTicker',
          'addressStreet1Manager',
          'addressStreet2Manager',
          'cityManager',
          'stateManager',
          'zipcodeManager'
        )
      ) %>%
      mutate_all(funs(. %>% str_to_upper() %>% str_trim())) %>%
      mutate_at(.vars = c('idCIK', 'idOrganizationType'),
                funs(. %>% as.numeric())) %>%
      suppressMessages()
    df <-
      df %>%
      mutate(
        countCharAddress2 = addressStreet2Manager %>% nchar,
        addressStreet2Manager = ifelse(countCharAddress2 == 1, NA, addressStreet2Manager),
        addressStreet2Manager = ifelse(addressStreet2Manager == "[NULL]", NA, addressStreet2Manager),
        idTicker = ifelse(idTicker == "[NULL]", NA, idTicker)
      ) %>%
      select(-countCharAddress2) %>%
      mutate(
        addressStreetManager = ifelse(
          addressStreet2Manager %>% is.na(),
          addressStreet1Manager,
          paste(addressStreet1Manager, addressStreet2Manager)
        ),
        addressEntity = list(
          addressStreetManager,
          " ",
          cityManager,
          ", ",
          stateManager,
          " ",
          zipcodeManager
        ) %>% purrr::invoke(paste0, .)
      )

    df <-
      df %>%
      left_join(tibble(
        idOrganizationType = c(30, 31, 32, 33, 55, 75),
        typeOrganization = c(
          'Open-end Mutual Fund',
          'Open-end Insurance Separate Account',
          'Variable Annuity Separate Account',
          'Variable Life Separate Account',
          'Closed-end Fund',
          'Insurance Unit Investment Trust'
        )
      )) %>%
      mutate(typeOrganization = typeOrganization %>% stringr::str_to_upper()) %>%
      select(idSEC:idOrganizationType, typeOrganization, everything()) %>%
      suppressMessages()

    if (return_message) {
      list(
        "Acquired ",
        df %>% nrow() %>% formattable::comma(digits = 0),
        ' SEC registered investment funds'
      ) %>%
        purrr::invoke(paste0, .) %>%
        cat(fill = T)
    }

    if (nest_data) {
      df <-
        df %>%
        nest(-nameManager, .key = dataManager)
    }

    return(df)

  }


# money_market_funds ------------------------------------------------------

.get_mmf_url_df <-
  function() {
    page <-
      "https://www.sec.gov/opa/data/opendatasets-mmfhtml.html" %>%
      read_html()

    slugs <-
      page %>%
      html_nodes('.views-field-field-display-title a') %>%
      html_attr('href')

    urls_csv <-
      slugs[slugs %>% str_detect(".csv")] %>%
      paste0('https://www.sec.gov', .)

    urls_csv %>% str_extract("(\\d)")

    dates <- urls_csv %>% str_split("/mmf|/mm-") %>% flatten_chr()
    dates <-
      dates[dates %>% str_detect(".csv")] %>% str_remove_all(".csv|\\-|\\_0")

    dates <-
      case_when(dates == "" ~ "201301",
              TRUE ~ dates) %>%
      str_c("01") %>%
      ymd()

    dates <- dates %m+% months(1) - 1
    tibble(dateData = dates,
           urlData = urls_csv) %>%
      mutate(
        yearData = dateData %>% lubridate::year(),
        monthData = dateData %>% lubridate::month()
      )

  }

.parse_mmf_url <-
  function(url = "https://www.sec.gov/open/datasets/mmf-2016-09.csv",
           return_message = TRUE) {
    df <-
      url %>%
      read_csv() %>%
      mutate_all(str_to_upper) %>%
      purrr::set_names(
        c(
          'dateData',
          'idCIK',
          'nameFiler',
          'nameIssuer',
          'idSeries',
          'typeFund',
          'nameClass',
          'idClass',
          'nameManager',
          'idTicker',
          'nameSubAdviser'
        )
      ) %>%
      suppressMessages() %>%
      suppressWarnings()

    if (df$dateData[[1]] %>% is.na()) {
      df <-
        df %>%
        slice(-(1:3))
    }

    is_dmy <-
      !df$dateData[[1]] %>% dmy() %>% is.na() %>%
      suppressWarnings()

    if (is_dmy) {
      df <-
        df %>%
        mutate(dateData = dateData %>% lubridate::dmy(),
               urlSEC = url) %>%
        suppressMessages() %>%
        suppressWarnings()
    }

    is_mdy <-
      !df$dateData[[1]] %>% mdy() %>% is.na() %>%
      suppressWarnings()

    if (is_mdy) {
      df <-
        df %>%
        mutate(dateData = dateData %>% lubridate::mdy(),
               urlSEC = url) %>%
        suppressMessages() %>%
        suppressWarnings()
    }

    is_ymd <-
      !df$dateData[[1]] %>% ymd() %>% is.na() %>%
      suppressWarnings()

    if (is_ymd) {
      df <-
        df %>%
        mutate(dateData = dateData %>% lubridate::ymd(),
               urlSEC = url) %>%
        suppressMessages() %>%
        suppressWarnings()
    }

    df <-
      df %>%
      mutate(idCIK = idCIK %>% as.numeric()) %>%
      suppressWarnings()

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>%
        cat(fill = T)
    }

    return(df)

  }

#' SEC registered money market funds
#'
#' This function returns information about SEC registered
#' money market funds for specified months and years from
#' January 2013 onwards.
#'

#' @param years years to include starting in 2013
#' @param months months to include
#' @param only_most_recent \code{TRUE} search only the most recent period
#' @param return_message \code{TRUE} return a message after data import
#' @param nest_data \code{TRUE} return nested data frame
#' @return nested \code{tibble} or \code{tibble} if \code{nest_data = FALSE}
#' @references \href{http://sec.gov}{The Securities and Exchange Commission}
#' @import purrr stringr dplyr rvest formattable lubridate readr
#' @return
#' @family SEC
#' @family entity search
#' @family fund data
#' @export
#'
#' @examples
#' \dontrun{
#' sec_money_market_funds(only_most_recent = TRUE, nest_data = FALSE)
#' sec_money_market_funds(only_most_recent = FALSE, years = 2013:2016)
#' }
sec_money_market_funds <-
  function(only_most_recent = TRUE,
           years = NULL,
           months = NULL,
           nest_data = F,
           return_message = TRUE) {
    missing <-
      only_most_recent == F & (length(years) ==0)
    if (missing) {
      stop("Please enter a years or only most recent")
    }

    parse_mmf_url_safe <-
      purrr::possibly(.parse_mmf_url, tibble())

    mmf_url_df <-
      .get_mmf_url_df()

    if (only_most_recent) {
      urls <-
        mmf_url_df %>%
        slice(1) %>%
        .$urlData
    }

    if (years %>% length() > 1) {
      only_most_recent <-
        FALSE
    }

    if (!only_most_recent) {
      possibly_years <-
        (mmf_url_df$yearData %>% min()):(mmf_url_df$yearData %>% max())

      if (years %in% possibly_years %>% sum()  == 0) {
        stop("Only possible years are " %>% paste0(paste0(years, collapse = ', ')))
      }

      url_df <-
        mmf_url_df %>%
        filter(yearData %in% years)

      if (length(months) > 0) {
        url_df <-
          url_df %>%
          filter(monthData %in% months)
      }

      urls <-
        url_df %>%
        .$urlData
    }

    all_data <-
      urls %>%
      future_map_dfr(function(x) {
        parse_mmf_url_safe(url = x, return_message = return_message)
      })

    has_missing_cik <-
      all_data %>%
      filter(idCIK %>% is.na()) %>%
      nrow() > 0

    if (has_missing_cik) {
      all_data <-
        all_data %>%
        select(-idCIK) %>%
        left_join(all_data %>%
                    filter(!idCIK %>% is.na()) %>%
                    select(idSeries, idCIK) %>%
                    distinct()) %>%
        select(dateData, idCIK, everything()) %>%
        suppressMessages()
    }

    all_data <-
      all_data %>%
      mutate(
        urlRankandFiled = list('http://rankandfiled.com/', '#/filers/', idCIK, '/table') %>% purrr::invoke(paste0, .)
      )

    if (return_message) {
      list(
        "You acquired ",
        all_data %>% select(idCIK, nameIssuer) %>% distinct() %>% nrow() %>% formattable::comma(digits = 0),
        " money market funds"
      ) %>%
        purrr::invoke(paste0, .) %>%
        cat(fill = T)
    }

    if (nest_data) {
      if (only_most_recent) {
        all_data <-
          all_data %>%
          nest(-nameFiler, .key = dataMutualFund)
      } else {
        all_data <-
          all_data %>%
          nest(-c(nameFiler, dateData), .key = dataMutualFund)
      }
    }

    return(all_data)
  }


# bankruptcies ------------------------------------------------------------

.parse_bankruptcy_url <-
  function(url = "https://www.sec.gov/open/datasets/public_company_bankruptcy_cases.csv",
           return_message = TRUE) {
    df <-
      url %>%
      read_csv() %>%
      mutate_all(str_to_upper) %>%
      suppressWarnings() %>%
      purrr::set_names(
        c(
          'idDistrictBankruptcy',
          'stateBankruptcy',
          'nameEntity',
          'amountAssets',
          'amountLiabilities'
        )
      ) %>%
      mutate(urlData = url) %>%
      suppressMessages() %>%
      suppressWarnings()

    df <-
      df %>%
      mutate_at(c('amountAssets', 'amountLiabilities'),
                funs(. %>% as.numeric() * 1000000)) %>%
      mutate(amountEquity = amountAssets - amountLiabilities) %>%
      select(idDistrictBankruptcy:amountLiabilities,
             amountEquity,
             everything()) %>%
      suppressWarnings()

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>%
        cat(fill = T)
    }

    return(df)
  }

#' SEC public filing bankruptcies
#'
#' This function returns information containing
#' bankruptcies of once public companies from 2009 to 2011
#'
#' @param return_message \code{TRUE} return a message after data import
#' @param nest_data \code{TRUE} return nested data frame
#' @return nested \code{tibble} or \code{tibble} if \code{nest_data = FALSE}
#' @references \href{http://sec.gov}{The Securities and Exchange Commission}
#' @export
#' @import purrr stringr dplyr rvest formattable
#' @family SEC
#' @family entity search
#' @examples
#' \dontrun{
#' sec_bankruptcies(nest_data = FALSE, return_message = TRUE)
#' }
sec_bankruptcies <-
  function(nest_data = TRUE,
           return_message = TRUE) {
    page <-
      "https://www.sec.gov/opa/data/opendatasetsshtmlbankruptcy.html" %>%
      read_html()

    url_data <-
      page %>%
      html_nodes('.views-field-field-display-title a') %>%
      html_attr('href') %>%
      paste0("https://www.sec.gov", .)

    years <-
      url_data %>%
      str_replace_all("https://www.sec.gov/open/datasets/public_company_bankruptcy_cases|.csv",
                      '') %>%
      readr::parse_number()

    years[[1]] <-
      2009

    url_df <-
      tibble(yearData = years,
                 urlData = url_data)

    parse_bankruptcy_url_safe <-
      purrr::possibly(.parse_bankruptcy_url, tibble())

    all_data <-
      url_df$urlData %>%
      future_map_dfr(function(x) {
        parse_bankruptcy_url_safe(url = x, return_message = return_message)
      })


    all_data <-
      all_data %>%
      left_join(url_df) %>%
      select(yearData, everything()) %>%
      mutate_at(
        .vars = c('amountAssets', 'amountLiabilities', 'amountEquity'),
        funs(. %>% formattable::currency(digits = 0))
      ) %>%
      suppressWarnings() %>%
      suppressMessages()

    if (return_message) {
      list(
        "You acquired ",
        all_data %>% nrow() %>% formattable::comma(digits = 0),
        " Public Company Bankruptcies from ",
        all_data$yearData %>% min(),
        ' to ',
        all_data$yearData %>% max(),
        " totaling ",
        all_data$amountLiabilities %>% sum(na.rm = TRUE) %>% formattable::currency(digits = 0),
        ' in Liabilities'
      ) %>%
        purrr::invoke(paste0, .) %>%
        cat(fill = T)
    }

    if (nest_data) {
      all_data <-
        all_data %>%
        nest(-yearData, .key = dataBankruptcies)
    }

    return(all_data)
  }

# broker_dealers ----------------------------------------------------------

.get_broker_data_urls <-
  function() {
    page <-
      "https://www.sec.gov/foia/docs/bd-archive.htm" %>%
      read_html()

    slugs <-
      page %>%
      html_nodes('a') %>%
      html_attr('href')

    slugs <- slugs[slugs %>% str_detect("bd")]
    slugs <- slugs %>%
      discard(function(x){
        is.na(x)
      })

    url_data <-
      'https://www.sec.gov' %>%
      paste0(slugs)

    urls <- url_data[url_data %>% str_detect(".txt")]

    periods <-
      slugs %>%
      str_split("/bd") %>%
      flatten_chr()

    periods <-
      periods[periods %>% str_detect(".txt")] %>%
      str_remove_all(".txt|_0")


    data <-
      tibble(dateData = periods,
             urlData = urls) %>%
      mutate(
        chars = nchar(dateData),
        dateData = case_when(chars == 7 ~ dateData %>% substr(1, 6),
                             TRUE ~ dateData),
        chars = nchar(dateData),
        month = dateData %>% substr(1, 2),
        day =  dateData %>% substr(3, 4),
        year = dateData %>% substr(5, 6),
        year = case_when(year == "" ~ lag(year, 1),
                         TRUE ~ year)
      ) %>%
      unite(date, year, month, day, sep = "-") %>%
      mutate(date = ymd(date)) %>%
      select(-dateData) %>%
      rename(dateData = date) %>%
      select(dateData, urlData) %>%
      mutate(
        yearData = dateData %>% lubridate::year(),
        monthData = dateData %>% lubridate::month()
      ) %>%
      filter(!is.na(dateData))

    data
  }

.parse_brokers_url <-
  function(url = "https://www.sec.gov/foia/bdreports/bd120116.txt",
           return_message = TRUE) {
    df <-
      url %>%
      read_tsv(col_names = FALSE) %>%
      select(-dplyr::matches("X9")) %>%
      mutate_all(str_to_upper) %>%
      purrr::set_names(
        c(
          'idCIK',
          'nameEntity',
          'idReportingFilingNumber',
          'addressStreet1Entity',
          'addressStreet2Entity',
          'cityEntity',
          'stateEntity',
          'zipcodeEntity'
        )
      ) %>%
      mutate(
        typeFiler = 'Registered Broker Dealer',
        addressStreetEntity = ifelse(
          addressStreet2Entity %>% is.na(),
          addressStreet1Entity,
          paste(addressStreet1Entity, addressStreet2Entity)
        ),
        addressEntity = list(
          addressStreetEntity,
          " ",
          cityEntity,
          ", ",
          stateEntity,
          " ",
          zipcodeEntity
        ) %>% purrr::invoke(paste0, .),
        urlData = url
      ) %>%
      mutate_at(.vars = c('idCIK', 'idReportingFilingNumber'),
                funs(. %>% as.numeric())) %>%
      suppressWarnings() %>%
      suppressMessages() %>%
      select(idCIK:idReportingFilingNumber, addressEntity, everything())

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>%
        cat(fill = T)
    }

    return(df)
  }

#' SEC registered broker-dealers
#'
#' This function returns information about SEC registered
#' broker dealers for specified periods from March 2007 onwards.
#'
#' @param years years to include starting in 2007
#' @param months months to include
#' @param only_most_recent \code{TRUE} search only the most recent period
#' @param return_message \code{TRUE} return a message after data import
#' @param nest_data \code{TRUE} return nested data frame
#' @return nested \code{tibble} or \code{tibble} if \code{nest_data = FALSE}
#' @references \href{http://sec.gov}{The Securities and Exchange Commission}
#' @import purrr stringr dplyr rvest formattable readr
#' @export
#'
#' @examples
#' @family SEC
#' @family entity search
#' @family broker dealers
#' \dontrun{
#' sec_broker_dealers(only_most_recent = TRUE)
#' sec_broker_dealers(only_most_recent = FALSE, years = 2016:2017, nest_data = FALSE)
#' }
sec_broker_dealers <-
  function(only_most_recent = TRUE,
           years = NULL,
           months = NULL,
           nest_data = FALSE,
           return_message = TRUE) {
    missing <-
      only_most_recent == F & (length(years) == 0)
    if (missing) {
      stop("Please enter a years or only most recent")
    }

    parse_brokers_url_safe <-
      purrr::possibly(.parse_brokers_url, tibble())

    broker_df <-
      .get_broker_data_urls()

    if (only_most_recent) {
      urls <-
        broker_df %>%
        slice(1) %>%
        .$urlData
    }

    if (years %>% length() > 1) {
      only_most_recent <-
        FALSE
    }

    if (!only_most_recent) {
      possibly_years <-
        (broker_df$yearData %>% min()):(broker_df$yearData %>% max())

      if (years %in% possibly_years %>% sum()  == 0) {
        stop("Only possible years are " %>% paste0(paste0(years, collapse = ', ')))
      }

      url_df <-
        broker_df %>%
        filter(yearData %in% years)

      if (length(months) > 0) {
        url_df <-
          url_df %>%
          filter(monthData %in% months)
      }

      urls <-
        url_df %>%
        .$urlData
    }

    all_data <-
      urls %>%
      future_map_dfr(function(x) {
        parse_brokers_url_safe(url = x, return_message = return_message)
      })

    all_data <-
      all_data %>%
      left_join(broker_df %>% select(dateData, urlData)) %>%
      suppressWarnings() %>%
      select(dateData, everything()) %>%
      suppressWarnings() %>%
      suppressMessages()

    if (return_message) {
      list(
        "You acquired ",
        all_data %>% select(nameEntity, idCIK) %>% distinct() %>% nrow() %>% formattable::comma(digits = 0),
        ' SEC registered broker dealers from ',
        all_data$dateData %>% min(),
        ' to ',
        all_data$dateData %>% max()
      ) %>%
        purrr::invoke(paste0, .) %>%
        cat(fill = T)
    }

    if (nest_data) {
      all_data <-
        all_data %>%
        nest(-dateData, .key = dataBrokers)
    }

    return(all_data)

  }


# municipal_dealers -------------------------------------------------------

.parse_municpal_dealer_url <-
  function(url = "https://www.sec.gov/foia/muniadvisors/ma120116.zip",
           return_message = TRUE) {
    df <-
      url %>%
      rio::import() %>%
      data.frame(stringsAsFactors = FALSE) %>%
      as_tibble() %>%
      mutate_all(str_to_upper) %>%
      suppressWarnings() %>%
      slice(-c(1:3)) %>%
      purrr::set_names(c('nameEntity', 'idReportingFilingNumber', 'idCIK')) %>%
      mutate(idCIK = idCIK %>% as.numeric()) %>%
      mutate(typeFiler = 'Registered Municipal Advisors',
             urlData = url)

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>%
        cat(fill = T)
    }

    return(df)

  }

.get_muni_advisor_urls <-
  function() {
    page <-
      "https://www.sec.gov/foia/docs/muniadvisors-archive.htm" %>%
      read_html()

    slugs <-
      page %>%
      html_nodes('#block-secgov-content a') %>%
      html_attr('href')

    slugs <-
      slugs[slugs %>% str_detect(".zip")]

    url_data <-
      'https://www.sec.gov' %>%
      paste0(slugs)

    periods <-
      slugs %>%
      str_replace_all('/foia/muniadvisors/ma|.zip', '') %>%
      lubridate::mdy()

    tibble(dateData = periods,
               urlData = url_data) %>%
      mutate(
        yearData = dateData %>% lubridate::year(),
        monthData = dateData %>% lubridate::month()
      )

  }

#' SEC registered municipal advisors
#'
#' This function returns data on SEC registered
#' municipal advisors from March 2015 onwards.
#'
#' @param years years to include starting in 2015
#' @param months months to include
#' @param only_most_recent \code{TRUE} search only the most recent period
#' @param return_message \code{TRUE} return a message after data import
#' @param nest_data \code{TRUE} return nested data frame
#' @return nested \code{tibble} or \code{tibble} if \code{nest_data = FALSE}
#' @references \href{http://sec.gov}{The Securities and Exchange Commission}
#' @export
#' @import purrr stringr dplyr rvest formattable lubridate readr
#' @importFrom rio import
#' @family SEC
#' @family broker dealers
#' @family entity search
#' @examples
#' \dontrun{
#' sec_municipal_advisors(only_most_recent = TRUE, nest_data = FALSE)
#' sec_municipal_advisors(only_most_recent = FALSE, years = 2015:2017, nest_data = TRUE)
#' }
sec_municipal_advisors <-
  function(only_most_recent = TRUE,
           years = NULL,
           months = NULL,
           nest_data = FALSE,
           return_message = TRUE) {
    missing <-
      only_most_recent == F & (length(years) == 0)
    if (missing) {
      stop("Please enter a years or only most recent")
    }

    parse_municpal_dealer_url_safe <-
      purrr::possibly(.parse_municpal_dealer_url, tibble())

    muni_df <-
      .get_muni_advisor_urls()

    if (only_most_recent) {
      urls <-
        muni_df %>%
        slice(1) %>%
        .$urlData
    }

    if (years %>% length() > 1) {
      only_most_recent <-
        FALSE
    }

    if (!only_most_recent) {
      possibly_years <-
        (muni_df$yearData %>% min()):(muni_df$yearData %>% max())

      if (years %in% possibly_years %>% sum()  == 0) {
        stop("Only possible years are " %>% paste0(paste0(years, collapse = ', ')))
      }

      url_df <-
        muni_df %>%
        filter(yearData %in% years)

      if (length(months) > 0) {
        url_df <-
          url_df %>%
          filter(monthData %in% months)
      }

      urls <-
        url_df %>%
        .$urlData
    }

    all_data <-
      urls %>%
      future_map_dfr(function(x) {
        parse_municpal_dealer_url_safe(url = x, return_message = return_message)
      })

    all_data <-
      all_data %>%
      left_join(muni_df %>% select(dateData, urlData)) %>%
      suppressWarnings() %>%
      select(dateData, everything()) %>%
      suppressWarnings() %>%
      suppressMessages()

    if (return_message) {
      list(
        "You acquired ",
        all_data %>% select(nameEntity, idCIK) %>% distinct() %>% nrow() %>% formattable::comma(digits = 0),
        ' SEC registered broker dealers from ',
        all_data$dateData %>% min(),
        ' to ',
        all_data$dateData %>% max()
      ) %>%
        purrr::invoke(paste0, .) %>%
        cat(fill = T)
    }

    if (nest_data) {
      all_data <-
        all_data %>%
        nest(-dateData, .key = dataMunicipalDealers)
    }

    return(all_data)

  }




# fail_to_deliver ---------------------------------------------------------

.parse_fail_to_deliver_url <-
  function(url = "https://www.sec.gov/foia/failsreports/cnsfails201611b.zip",
           return_message = TRUE) {
    td <-
      tempdir()
    tf <-
      tempfile(tmpdir = td, fileext = ".zip")

    url %>%
      curl::curl_download(destfile = tf)

    con <-
      unzip(tf, exdir = td)

    df <-
      con %>%
      read_delim(delim = '|', col_names = FALSE) %>%
      slice(-1) %>%
      purrr::set_names(
        c(
          'dateSettlement',
          'idCUSIP',
          'idTicker',
          'countShares',
          'descriptionSecurity',
          'amountPrice'
        )
      ) %>%
      mutate_all(str_to_upper) %>%
      mutate(
        dateSettlement = dateSettlement %>% lubridate::ymd(),
        amountPrice = amountPrice %>% readr::parse_number(),
        countShares = countShares %>% readr::parse_number(),
        amountTransaction = amountPrice * countShares,
        urlData = url
      ) %>%
      suppressWarnings() %>%
      suppressMessages()

    con %>%
      unlink()

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>%
        cat(fill = T)
    }

    return(df)
  }

.get_fail_to_deliver_urls <-
  function() {
    page <-
      "https://www.sec.gov/data/foiadocsfailsdatahtm" %>%
      read_html()

    slugs <-
      page %>%
      html_nodes('.views-field-field-display-title a') %>%
      html_attr('href')

    urls <-
      slugs %>% paste0('https://www.sec.gov', .)

    periods <-
      slugs %>%
      str_split("/") %>%
      flatten_chr()

    periods <- periods[periods %>% str_detect("zip")] %>%
      str_remove_all(".zip") %>%
      str_remove_all("_|cnsfails|cnspsecfails|cnspsecfails") %>%
      str_remove_all("cnspsecfails")

    periods <-
      periods %>%
      str_replace_all("q1", "03") %>%
      str_replace_all("q2", "06") %>%
      str_replace_all("q3", "09") %>%
      str_replace_all("q4", "12")


    periods <- case_when(
      periods %>% str_detect("b$") ~ periods %>%
        str_remove_all("b$") %>%
        str_c("01"),
      periods %>% str_detect("a$|a0$") ~ periods %>%
        str_remove_all("a$|a0$") %>%
        str_c("15"),
      TRUE ~ periods %>% str_c("01")
    )

     periods <- periods %>%  ymd()


    tibble(dateData = periods, urlData = urls) %>%
      mutate(
        dateData = dateData %>% lubridate::ymd(),
        monthData = dateData %>% lubridate::month(),
        yearData = dateData %>% lubridate::year()
      )

  }

#' SEC failed to deliver securities
#'
#' This function returns informato on equities
#' that failed to clear as reported to the National Securities Clearing Corporation
#' from  2004 onward.
#'
#' @param years years to include starting in 2004
#' @param months months to include
#' @param only_most_recent \code{TRUE} search only the most recent period
#' @param return_message \code{TRUE} return a message after data import
#' @param nest_data \code{TRUE} return nested data frame
#' @return nested \code{tibble} or \code{tibble} if \code{nest_data = FALSE}
#' @references \href{http://sec.gov}{The Securities and Exchange Commission}
#' @export
#' @import purrr stringr dplyr rvest formattable readr xml2
#' @importFrom curl curl_download
#' @family SEC
#' @family public company data
#' @examples
#' \dontrun{
#' sec_failed_to_deliver_securities(nest_data = TRUE)
#' sec_failed_to_deliver_securities(years = 2004:2016, nest_data = TRUE)
#' }
sec_failed_to_deliver_securities <-
  function(only_most_recent = TRUE,
           years = 2016,
           months = NULL,
           nest_data = F,
           return_message = TRUE) {
    missing <-
      only_most_recent == F & (length(years) == 0)
    if (missing) {
      stop("Please enter a years or only most recent")
    }

    parse_fail_to_deliver_url_safe <-
      purrr::possibly(.parse_fail_to_deliver_url, tibble())

    urls_df <-
      .get_fail_to_deliver_urls()

    if (only_most_recent) {
      urls <-
        urls_df %>%
        slice(1) %>%
        .$urlData
    }

    if (years %>% length() > 1) {
      only_most_recent <-
        FALSE
    }

    if (!only_most_recent) {
      possibly_years <-
        (urls_df$yearData %>% min()):(urls_df$yearData %>% max())

      if (years %in% possibly_years %>% sum()  == 0) {
        stop("Only possible years are " %>% paste0(paste0(years, collapse = ', ')))
      }

      url_df <-
        urls_df %>%
        filter(yearData %in% years)

      if (!months %>% is_null()) {
        url_df <-
          url_df %>%
          filter(monthData %in% months)
      }

      urls <-
        url_df %>%
        .$urlData
    }

    all_data <-
      urls %>%
      future_map_dfr(function(x) {
        parse_fail_to_deliver_url_safe(url = x, return_message = return_message)
      })

    all_data <-
      all_data %>%
      left_join(urls_df %>% select(dateData, urlData)) %>%
      suppressWarnings() %>%
      select(dateData, everything()) %>%
      suppressWarnings() %>%
      suppressMessages()

    if (return_message) {
      list(
        "You acquired ",
        all_data %>% nrow() %>% formattable::comma(digits = 0),
        ' trades that failed to deliver from ',
        all_data$dateData %>% min(),
        ' to ',
        all_data$dateData %>% max()
      ) %>%
        purrr::invoke(paste0, .) %>%
        cat(fill = T)
    }

    if (nest_data) {
      all_data <-
        all_data %>%
        nest(-dateData, .key = dataFailedToDeliverSecurities)
    }

    return(all_data)

  }


# market_structure --------------------------------------------------------

.get_market_structure_url_df <-
  function() {
    page <-
      "https://www.sec.gov/opa/data/market-structure/marketstructuredownloadshtml-by_security.html" %>%
      read_html()

    slugs <-
      page %>%
      html_nodes('a') %>%
      html_attr('href')

    slugs <-slugs[slugs %>% str_detect("zip")] %>%
      discard(function(x){
        is.na(x)
      })

    urls <-
      slugs %>%
      str_c("https://www.sec.gov",.)

    periods <-
      slugs %>%
      str_split("/") %>%
      flatten_chr() %>%
      keep(function(x){
        x %>% str_detect("zip")
      }) %>%
      str_remove_all("individual_security_|.zip")


    data <-
      tibble(period = periods, urlData = urls) %>%
      separate(period,
               sep = "\\_",
               into = c("yearData", "quarterData")) %>%
      mutate(
        yearData = as.numeric(yearData),
        quarterData = quarterData %>% str_replace_all("q10", "q1") %>% str_to_upper,
        dateData = case_when(
          quarterData == "Q1" ~ glue::glue("{yearData}-03-31"),
          quarterData == "Q2" ~ glue::glue("{yearData}-06-30"),
          quarterData == "Q3" ~ glue::glue("{yearData}-09-30"),
          quarterData == "Q4" ~ glue::glue("{yearData}-12-31"),
        ) %>% ymd()
      )

    data
  }

.parse_market_structure_url <-
  function(url = "https://www.sec.gov/files/node/add/data_distribution/individual_security_2019_q4.zip",
           return_message = TRUE) {
    td <-
      tempdir()
    tf <-
      tempfile(tmpdir = td, fileext = ".zip")

    url %>%
      curl::curl_download(destfile = tf)

    con <-
      unzip(tf, exdir = td)

    csv_loc <-
      con %>%
      keep(function(x){
        x %>% str_detect("csv")
      })

    df <-
      csv_loc %>%
      read_csv() %>%
      purrr::set_names(
        c(
          "dateTrading",
          "typeSecurity",
          "idTicker",
          "rankMCAP",
          "rankTurn",
          "rankVolatility",
          "rankPrice",
          "volumeLIT",
          "volumeOrders",
          "countHidden",
          "countTradesForHidden",
          "volumeHidden",
          "volumeTradeForHidden",
          "countCancels",
          "countLitTrades",
          "countOddLots",
          "countTradesForOddLots",
          "volumeOddLot",
          "volumeTradeVolForOddLots"
        )
      ) %>%
      mutate_at(.vars = c(8, 9, 12, 13, 18, 19),
                funs(. %>% as.numeric() * 1000)) %>%
      mutate_at(
        .vars = c('typeSecurity', 'idTicker'),
        .funs = funs(. %>% str_to_upper())
      ) %>%
      mutate(urlData = url,
             dateTrading = dateTrading %>% lubridate::ymd())

    con %>%
      unlink()

    if (return_message) {
      list("Parsed: ", url) %>%
        purrr::invoke(paste0, .) %>%
        cat(fill = T)
    }

   df
  }

#' SEC quarterly securities transaction metrics
#'
#' This function returns a variety of metrics surrounding
#' transactions for individual securities.  The quarterly data
#' starts in 2012.
#'
#' @param years years to include
#' @param months months to include
#' @param only_most_recent \code{TRUE} search only the most recent period
#' @param return_message \code{TRUE} return a message after data import
#' @param nest_data \code{TRUE} return nested data frame
#' @return nested \code{tibble} or \code{tibble} if \code{nest_data = FALSE}
#' @references \href{http://sec.gov}{The Securities and Exchange Commission}
#' @import purrr stringr dplyr rvest formattable readr
#' @importFrom curl curl_download
#' @export
#' @family SEC
#' @family transaction data
#' @family public company data
#' @examples
#' \dontrun{
#' sec_securities_metrics_by_exchange(only_most_recent = TRUE)
#' sec_securities_metrics_by_exchange(years = 2016)
#' }
sec_securities_metrics_by_exchange <-
  function(only_most_recent = T,
           years = NULL,
           months = NULL,
           nest_data = F,
           return_message = TRUE) {
    missing <-
      only_most_recent == F & (length(years) == 0)
    if (missing) {
      stop("Please enter a years or only most recent")
    }

    parse_market_structure_url_safe <-
      purrr::possibly(.parse_market_structure_url, tibble())

    urls_df <-
      .get_market_structure_url_df() %>%
      mutate(
        yearData = dateData %>% lubridate::year(),
        monthData = dateData %>% lubridate::month()
      )

    if (!only_most_recent) {
      possibly_years <-
        (urls_df$yearData %>% min()):(urls_df$yearData %>% max())

      if (years %in% possibly_years %>% sum()  == 0) {
        stop("Only possible years are " %>% paste0(paste0(years, collapse = ', ')))
      }

      if (years %>% length() > 1) {
        only_most_recent <-
          FALSE
      }

      if (only_most_recent) {
        urls <-
          urls_df %>%
          slice(1) %>%
          .$urlData
      }


      url_df <-
        urls_df %>%
        filter(yearData %in% years)

      if (length(months) > 0) {
        url_df <-
          url_df %>%
          filter(monthData %in% months)
      }

      urls <-
        url_df %>%
        .$urlData
    }

    if (only_most_recent) {
      urls_df <-
        .get_market_structure_url_df()

      urls <-
        urls_df %>% slice(1) %>% .$urlData
    }

    all_data <-
      urls %>%
      future_map_dfr(function(x) {
        parse_market_structure_url_safe(url = x, return_message = return_message)
      })

    all_data <-
      all_data %>%
      left_join(urls_df %>% select(dateData, urlData)) %>%
      suppressWarnings() %>%
      select(dateData, everything()) %>%
      suppressWarnings() %>%
      suppressMessages()

    all_data <-
      all_data %>%
      mutate_at(.vars = all_data %>% select(dplyr::matches("^count|^volume")) %>% names(),
                funs(. %>% formattable::comma(digits = 0)))

    if (return_message) {
      list(
        "You acquired ",
        all_data$volumeOrders %>% sum(na.rm = TRUE) %>% formattable::comma(digits = 0),
        ' traded shares from ',
        all_data$dateData %>% min(),
        ' to ',
        all_data$dateData %>% max()
      ) %>%
        purrr::invoke(paste0, .) %>%
        cat(fill = T)
    }

    if (nest_data) {
      all_data <-
        all_data %>%
        nest(-dateData, .key = dataQuarterlyMetrics)
    }

    return(all_data)

  }


# xbrl --------------------------------------------------------------------

# Note: The SEC website folder http://www.sec.gov/Archives/edgar/data/{cik}/{accession}/ will always contain all the files for a given submission, where {accession} is the adsh with the ‘-‘characters removed.
.get_xbrl_name_df <-
  function() {
    tibble(
      nameSEC = c(
        "adsh",
        "report",
        "line",
        "stmt",
        "inpth",
        "rfile",
        "tag",
        "version",
        "plabel",
        "negating",
        "coreg",
        "ddate",
        "qtrs",
        "uom",
        "value",
        "footnote",
        "cik",
        "name",
        "sic",
        "countryba",
        "stprba",
        "cityba",
        "zipba",
        "bas1",
        "bas2",
        "baph",
        "countryma",
        "stprma",
        "cityma",
        "zipma",
        "mas1",
        "mas2",
        "countryinc",
        "stprinc",
        "ein",
        "former",
        "changed",
        "afs",
        "wksi",
        "fye",
        "form",
        "period",
        "fy",
        "fp",
        "filed",
        "accepted",
        "prevrpt",
        "detail",
        "instance",
        "nciks",
        "aciks",
        "pubfloatusd",
        "floatdate",
        "custom",
        "abstract",
        "datatype",
        "iord",
        "crdr",
        "tlabel",
        "doc"
      ),
      nameActual = c(
        "idEDGARAccession",
        "idReport",
        "idLine",
        "idFinancialStatementLocation",
        "isValueNet",
        "idDataFileType",
        "itemTag",
        "idVersion",
        "labelPreferred",
        "isNegating",
        "nameCoRegistrant",
        "datePeriodEnd",
        "durationQuarters",
        "typeUOM",
        "valueItem",
        "descriptionFootnote",
        "idCIK",
        "nameFiler",
        "idSIC",
        "countryFilerBusiness",
        "stateprovinceFilerBusiness",
        "cityFilerBusiness",
        "zipcodeFilerBusiness",
        "addressStreet1FilerBusiness",
        "addressStreet2FilerBusiness",
        "phoneFilerBusiness",
        "countryFilerMailing",
        "stateprovinceFilerMailing",
        "cityFilerMailing",
        "zipcodeFilerMailing",
        "addressStreet1FilerMailing",
        "addressStreet2FilerMailing",
        "countryIncorporation",
        "stateprovinceIncorporation",
        "idEIN",
        "nameFilerFormer",
        "dateNameChanged",
        "idAFS",
        "isWellKnownSeasonedIssuer",
        "monthdayFiscalYearEnd",
        "idForm",
        "dateBalanceSheet",
        "yearFiscalEnd",
        "idFiscalPeriod",
        "dateFiled",
        "datetimeAccepted",
        "isAmmendedFiling",
        "hasQuantitativeFootnotes",
        "slugXBRLInstance",
        "countCIKFiler",
        "additionalCIKs",
        "amountFloatUSD",
        "dateFloatUSD",
        "isTagCustom",
        "isTagNonNumeric",
        "typeData",
        "idValueType",
        "idAccountingType",
        "labelTaxonomy",
        "descriptionItem"
      )

    )
  }

.get_xbrl_url_df <-
  function() {
    page <-
      "https://www.sec.gov/dera/data/financial-statement-data-sets.html" %>%
      read_html()

    slugs <-
      page %>%
      html_nodes('.views-field-field-display-title a') %>%
      html_attr('href') %>%
      str_replace_all('http://www.sec.gov', '')


    periods <-
      slugs %>%
      str_replace_all('/files/dera/data/financial-statement-data-sets/|.zip', '')

    urls <-
      'http://www.sec.gov' %>%
      paste0(slugs)

    url_df <-
      tibble(idPeriod = periods, urlSEC = urls) %>%
      separate(
        idPeriod,
        into = c('yearData', 'quarterData'),
        sep = 'q',
        remove = FALSE
      ) %>%
      mutate_at(.vars = c('yearData', 'quarterData'),
                funs(. %>% as.numeric()))

    url_df
  }

.read_xbrl_file <-
  function(path = './tag.txt') {
    path %>%
      readr::read_tsv() %>%
      suppressWarnings() %>%
      suppressMessages()
  }

.parse_xbrl_url <-
  function(url = "http://www.sec.gov/data/financial-statements/2016q3.zip",
           only_all = TRUE,
           return_message = TRUE) {
    options(scipen = 999999)

    period_data <-
      url %>% str_replace_all("http://www.sec.gov/data/financial-statements/|.zip|/files/dera/data/financial-statement-data-sets/", '')

    year_data <-
      period_data %>%
      str_split('\\q') %>%
      flatten_chr() %>%
      .[[1]] %>%
      as.numeric()

    quarter_data <-
      period_data %>%
      str_split('\\q') %>%
      flatten_chr() %>%
      .[[2]] %>%
      as.numeric()

    date_data <-
      tibble(yearData = year_data,
                 quarterData = quarter_data) %>%
      left_join(tibble(
        quarterData = 1:4,
        monthday = c('0331', '0630', '0930', '1231')
      )) %>%
      tidyr::unite(dateData, yearData, monthday, sep = '') %>%
      .$dateData %>% lubridate::ymd() %>%
      suppressMessages()

    read_xbrl_file_safe <-
      purrr::possibly(.read_xbrl_file, tibble())

    tmp <-
      tempfile()
    url %>%
      curl::curl_download(url = ., tmp)
    con <-
      unzip(tmp)

    name_xbrl <-
      .get_xbrl_name_df()

    con <- con[con %>% str_detect("txt")]
    sub <-
      con[con %>% str_detect('sub')] %>%
      .read_xbrl_file

    sub_names <-
      name_xbrl %>%
      filter(nameSEC %in% names(sub)) %>%
      .$nameActual

    sub <-
      sub %>%
      purrr::set_names(sub_names)

    sub <-
      sub %>%
      mutate_at(
        sub %>% select(dplyr::matches("^date[A-Z]")) %>% select(-dplyr::matches("datetime")) %>% names(),
        funs(. %>% as.numeric() %>% lubridate::ymd())
      ) %>%
      mutate_at(sub %>% select(dplyr::matches("^idCIK|idSIC|idEIN|^amount[A-Z]")) %>% names(),
                funs(. %>% as.numeric())) %>%
      mutate_at(sub %>% select(dplyr::matches("^is[A-Z]|^has[A-Z]")) %>% names(),
                funs(. %>% as.logical())) %>%
      mutate_at(sub %>% select(dplyr::matches("^amount")) %>% names(),
                funs(. %>% formattable::currency(digits = 0))) %>%
      suppressWarnings() %>%
      mutate(
        urlSECXBRLFiling =
          list(
            'http://www.sec.gov/Archives/edgar/data/',
            idCIK,
            '/' ,
            idEDGARAccession %>% str_replace_all('\\-', ''),
            '/',
            slugXBRLInstance
          ) %>% purrr::invoke(paste0, .),
        addressStreetFilerBusiness = ifelse(
          addressStreet2FilerBusiness %>% is.na(),
          addressStreet1FilerBusiness,
          paste(addressStreet1FilerBusiness,
                addressStreet2FilerBusiness)
        ),
        addressFilerBusiness = list(
          addressStreetFilerBusiness,
          " ",
          cityFilerBusiness,
          ", ",
          stateprovinceFilerBusiness,
          " ",
          zipcodeFilerBusiness
        ) %>% purrr::invoke(paste0, .),
        addressStreetFilerMailing = ifelse(
          addressStreet2FilerMailing %>% is.na(),
          addressStreet1FilerMailing,
          paste(addressStreet1FilerMailing,
                addressStreet2FilerMailing)
        ),
        addressFilerMailing = list(
          addressStreetFilerMailing,
          " ",
          cityFilerMailing,
          ", ",
          stateprovinceFilerMailing,
          " ",
          zipcodeFilerMailing
        ) %>% purrr::invoke(paste0, .)
      ) %>%
      left_join(tibble(
        idAFS = c("1-LAF", "2-ACC", "3-SRA", "4-NON", "5-SML", NA),
        typeAFS = c(
          'Large Accelerated',
          'Accelerated',
          'Smaller Reporting Accelerated',
          'Non-Accelerated',
          'Smaller Reporting Filer',
          'Not Assigned'
        )
      )) %>%
      suppressWarnings() %>%
      suppressMessages()

    pre <-
      con[con %>% str_detect('pre')] %>%
      .read_xbrl_file()

    pre_names <-
      name_xbrl %>%
      filter(nameSEC %in% names(pre)) %>%
      .$nameActual

    pre <-
      pre %>%
      purrr::set_names(pre_names)

    pre <-
      pre %>%
      mutate_at(
        pre %>% select(dplyr::matches("^date[A-Z]")) %>% select(-dplyr::matches("datetime")) %>% names(),
        funs(. %>% as.numeric() %>% lubridate::ymd())
      ) %>%
      mutate_at(pre %>% select(dplyr::matches("^idCIK|idSIC|idEIN|^amount[A-Z]")) %>% names(),
                funs(. %>% as.numeric())) %>%
      mutate_at(pre %>% select(dplyr::matches("^is[A-Z]|^has[A-Z]")) %>% names(),
                funs(. %>% as.logical())) %>%
      mutate_at(pre %>% select(dplyr::matches("^amount")) %>% names(),
                funs(. %>% formattable::currency(digits = 0))) %>%
      suppressWarnings() %>%
      unite(idReportLine,
            idReport,
            idLine,
            sep = ".",
            remove = FALSE) %>%
      mutate(idReportLine = idReportLine %>% as.numeric()) %>%
      left_join(
        tibble(
          idFinancialStatementLocation = c("BS", "IS", "CF", "EQ", "CI", "UN"),
          nameFinancialStatementLocation = c(
            "Balance Sheet",
            "Income Statement",
            "Cash Flow",
            "Equity",
            "Comprehensive Income",
            "Unclassifiable Statement"
          )
        )
      ) %>%
      left_join(tibble(
        idDataFileType = c("H", "X"),
        typeDataFile = c("HTML", "XML")
      )) %>%
      suppressMessages()

    num <-
      con[con %>% str_detect('num')] %>%
      .read_xbrl_file()

    num_names <-
      name_xbrl %>%
      filter(nameSEC %in% names(num)) %>%
      .$nameActual

    num <-
      num %>%
      purrr::set_names(num_names)

    num <-
      num %>%
      mutate_at(
        num %>% select(dplyr::matches("^date[A-Z]")) %>% select(-dplyr::matches("datetime")) %>% names(),
        funs(. %>% as.numeric() %>% lubridate::ymd())
      ) %>%
      mutate_at(num %>% select(dplyr::matches("^idCIK|idSIC|idEIN|^amount[A-Z]")) %>% names(),
                funs(. %>% as.numeric())) %>%
      mutate_at(num %>% select(dplyr::matches("^is[A-Z]|^has[A-Z]")) %>% names(),
                funs(. %>% as.logical())) %>%
      mutate_at(num %>% select(dplyr::matches("^amount")) %>% names(),
                funs(. %>% formattable::currency(digits = 0))) %>%
      left_join(tibble(
        typeUOM = c("AUD", "CAD", "CHF", "EUR", "JPY", "shares", "USD"),
        itemUOM = c(
          "amountAUD",
          "amountCAD",
          "amountCHF",
          "amountEUR",
          "amountJPY",
          "amuntShares",
          "amountUSD"
        )
      )) %>%
      select(-typeUOM) %>%
      select(idEDGARAccession:durationQuarters, itemUOM, everything()) %>%
      suppressMessages() %>%
      suppressWarnings()

    tag <-
      con[con %>% str_detect('tag')] %>%
      .read_xbrl_file()

    tag_names <-
      name_xbrl %>%
      filter(nameSEC %in% names(tag)) %>%
      .$nameActual

    tag <-
      tag %>%
      purrr::set_names(tag_names)

    tag <-
      tag %>%
      mutate_at(
        tag %>% select(dplyr::matches("^date[A-Z]")) %>% select(-dplyr::matches("datetime")) %>% names(),
        funs(. %>% as.numeric() %>% lubridate::ymd())
      ) %>%
      mutate_at(tag %>% select(dplyr::matches("^idCIK|idSIC|idEIN|^amount[A-Z]")) %>% names(),
                funs(. %>% as.numeric())) %>%
      mutate_at(tag %>% select(dplyr::matches("^is[A-Z]|^has[A-Z]")) %>% names(),
                funs(. %>% as.logical())) %>%
      mutate_at(tag %>% select(dplyr::matches("^amount")) %>% names(),
                funs(. %>% formattable::currency(digits = 0))) %>%
      suppressWarnings() %>%
      left_join(tibble(
        idValueType = c("I", "D"),
        typeValue = c("Point-in Time", "Duration")
      )) %>%
      left_join(tibble(
        idAccountingType = c("C", "D"),
        typeAccounting = c("Credit", "Debit")
      )) %>%
      suppressMessages()
    con %>%
      unlink()
    if (only_all) {
      all <-
        sub %>%
        inner_join(num) %>%
        inner_join(tag) %>%
        left_join(pre) %>%
        select(
          idEDGARAccession,
          idCIK,
          nameFiler:typeAFS,
          idReport,
          idLine,
          idFinancialStatementLocation,
          idAccountingType,
          typeData,
          itemTag,
          labelTaxonomy,
          labelPreferred,
          everything()
        ) %>%
        distinct() %>%
        suppressWarnings() %>%
        suppressMessages()

      all_data <-
        tibble(nameTable = c('All Data'),
                   dataTable = list(all)) %>%
        mutate(
          periodData = period_data,
          yearData = year_data,
          quarterData = quarter_data,
          urlData = url
        ) %>%
        select(periodData:urlData, nameTable, dataTable)

      return(all_data)
    }

    all_data <-
      tibble(
        nameTable = c('Presentation', 'Values', 'Filers', 'Taxonomy'),
        dataTable = list(pre, num, sub, tag)
      ) %>%
      mutate(
        periodData = period_data,
        yearData = year_data,
        quarterData = quarter_data,
        urlData = url
      ) %>%
      select(periodData:urlData, nameTable, dataTable)
    if (return_message) {
      list("Parsed XBRL ",
           sub %>% nrow() %>% comma(digits = 0),
           " filers for ",
           period_data) %>%
        purrr::invoke(paste0, .) %>%
        cat(fill = T)
    }

    return(all_data)
  }

#' SEC quarterly XBRL data
#'
#' This function returns quarterly full \href{https://en.wikipedia.org/wiki/XBRL}{XBRL} dumps
#' for all SEC registered public companies begining in 2009.
#'
#' @param years years to include starting in 2009
#' @param quarters quarters to search \itemize{
#' \item \code{NULL}(default): all quarters,
#' \item \code{1}: Q1
#' \item \code{2}: Q2
#' \item \code{3}: Q3
#' \item \code{4}: Q4
#' }
#' @param only_most_recent \code{TRUE} search only the most recent period
#' @param return_message \code{TRUE} return a message after data import
#' @param tables underlying XBRL tables you want to include \itemize{
#' \item \code{NULL}: all tables (default)
#' \item \code{Presentation}: presentation data
#' \item \code{Values}: XBRL value data
#' \item \code{Filers}: filer data
#' \item \code{Taxonomy}: taxonomy data
#' }
#' @param only_all \code{TRUE} returns a joined file of the four tables
#' @param assign_to_environment \code{TRUE} assign the results into your environment
#'
#' @return nested \code{tibble} or \code{tibble}
#' @references \href{http://sec.gov}{The Securities and Exchange Commission}
#' @export
#' @import purrr stringr dplyr rvest formattable
#' @importFrom curl curl_download
#' @examples
#' @family SEC
#' @family public company data
#' @family entity search
#' @examples
#' \dontrun{
#' sec_xbrl_periods(only_most_recent = TRUE, only_all = TRUE, assign_to_environment = TRUE, return_message = TRUE)
#' }
sec_xbrl_periods <-
  function(only_most_recent = TRUE,
           years = NULL,
           quarters = NULL,
           tables = NULL,
           only_all = TRUE,
           assign_to_environment = TRUE,
           return_message = TRUE) {
    url_df <-
      .get_xbrl_url_df() %>%
      arrange(desc(yearData), desc(quarterData))

    has_years <-
      (length(years) > 0)
    if (has_years) {
      url_df <-
        url_df %>%
        filter(yearData %in% years)
    }

    has_quarter <-
      (length(quarters) > 0)
    if (has_quarter) {
      url_df <-
        url_df %>%
        filter(quarterData %in% quarters)
    }

    if (only_most_recent) {
      url_df <-
        .get_xbrl_url_df() %>%
        arrange(desc(yearData), desc(quarterData)) %>%
        slice(1)
    }

    urls <-
      url_df$urlSEC

    parse_xbrl_url_safe <-
      purrr::possibly(.parse_xbrl_url, tibble())

    all_data <-
      urls %>%
      future_map_dfr(function(x) {
        parse_xbrl_url_safe(url = x,
                            only_all = only_all,
                            return_message = return_message)
      }) %>%
      suppressWarnings()

    if (assign_to_environment) {
      table_name_df <-
        all_data %>%
        select(nameTable) %>%
        distinct() %>%
        mutate(nameDF =
                 list('xbrl', nameTable %>% str_replace_all('\\ ', '')) %>% purrr::invoke(paste0, .))

      if (length(tables) > 0) {
        all_data <-
          all_data %>%
          filter(nameTable %>% str_detect(tables))
      }

      1:nrow(all_data) %>%
        walk(function(x) {
          df_name <-
            table_name_df %>%
            filter(nameTable == all_data %>% slice(x) %>% .$nameTable) %>%
            .$nameDF

          df_data <-
            all_data %>%
            slice(x) %>%
            select(yearData, quarterData, periodData, dataTable) %>%
            unnest()

          df_data <-
            df_data %>%
            mutate_at(.vars =
                        df_data %>% select(dplyr::matches("^amount|^price|^value")) %>% names(),
                      funs(. %>% formattable::currency(digits = 2))) %>%
            mutate_at(
              .vars =
                df_data %>% select(dplyr::matches("^count[A-Z]")) %>% select(-dplyr::matches("country")) %>% names(),
              funs(. %>% formattable::comma(digits = 0))
            )

          assign(x = df_name, eval(df_data), envir = .GlobalEnv)

        })
    }

    if (!only_all) {
      return(all_data)
    }

  }
abresler/fundManageR documentation built on April 1, 2024, 5:46 p.m.